From 69fb09f6ccdb2f070557fd1f4c56c4d646694c8e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 7 Jul 2017 13:06:34 +0800 Subject: [PATCH 001/253] perf annotate: Check for fused instructions Macro fusion merges two instructions to a single micro-op. Intel core platform performs this hardware optimization under limited circumstances. For example, CMP + JCC can be "fused" and executed /retired together. While with sampling this can result in the sample sometimes being on the JCC and sometimes on the CMP. So for the fused instruction pair, they could be considered together. On Nehalem, fused instruction pairs: cmp/test + jcc. On other new CPU: cmp/test/add/sub/and/inc/dec + jcc. This patch adds an x86-specific function which checks if 2 instructions are in a "fused" pair. For non-x86 arch, the function is just NULL. Changelog: v4: Move the CPU model checking to symbol__disassemble and save the CPU family/model in arch structure. It avoids checking every time when jump arrow printed. v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC). v2: Remove the original weak function. Arnaldo points out that doing it as a weak function that will be overridden by the host arch doesn't work. So now it's implemented as an arch-specific function. Committer fix: Do not access evsel->evlist->env->cpuid, ->env can be null, introduce perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in this function call. The original patch was segfaulting 'perf top' + annotation. But this essentially disables this fused instructions augmentation in 'perf top', the right thing is to get the cpuid from the running kernel, left for a later patch tho. Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/annotate/instructions.c | 46 +++++++++++++++++++++ tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 4 +- tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/util/annotate.c | 22 +++++++++- tools/perf/util/annotate.h | 3 +- tools/perf/util/evsel.c | 7 ++++ tools/perf/util/evsel.h | 1 + 8 files changed, 81 insertions(+), 6 deletions(-) diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index c1625f256df3..d84b72063a30 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -76,3 +76,49 @@ static struct ins x86__instructions[] = { { .name = "xbeginq", .ops = &jump_ops, }, { .name = "retq", .ops = &ret_ops, }, }; + +static bool x86__ins_is_fused(struct arch *arch, const char *ins1, + const char *ins2) +{ + if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp")) + return false; + + if (arch->model == 0x1e) { + /* Nehalem */ + if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) || + strstr(ins1, "test")) { + return true; + } + } else { + /* Newer platform */ + if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) || + strstr(ins1, "test") || + strstr(ins1, "add") || + strstr(ins1, "sub") || + strstr(ins1, "and") || + strstr(ins1, "inc") || + strstr(ins1, "dec")) { + return true; + } + } + + return false; +} + +static int x86__cpuid_parse(struct arch *arch, char *cpuid) +{ + unsigned int family, model, stepping; + int ret; + + /* + * cpuid = "GenuineIntel,family,model,stepping" + */ + ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping); + if (ret == 3) { + arch->family = family; + arch->model = model; + return 0; + } + + return -1; +} diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6052376634c0..022486dc67f5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0, NULL); + err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 27f41f28dcb4..c4336138b673 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -9,6 +9,7 @@ #include "../../util/symbol.h" #include "../../util/evsel.h" #include "../../util/config.h" +#include "../../util/evlist.h" #include #include #include @@ -1074,7 +1075,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, } err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - sizeof_bdl, &browser.arch); + sizeof_bdl, &browser.arch, + perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index d903fd493416..87e3760624f2 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, return -1; err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL); + 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index be1caabb9290..8748ebb3f932 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -47,7 +47,12 @@ struct arch { bool sorted_instructions; bool initialized; void *priv; + unsigned int model; + unsigned int family; int (*init)(struct arch *arch); + bool (*ins_is_fused)(struct arch *arch, const char *ins1, + const char *ins2); + int (*cpuid_parse)(struct arch *arch, char *cpuid); struct { char comment_char; char skip_functions_char; @@ -129,6 +134,8 @@ static struct arch architectures[] = { .name = "x86", .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), + .ins_is_fused = x86__ins_is_fused, + .cpuid_parse = x86__cpuid_parse, .objdump = { .comment_char = '#', }, @@ -171,6 +178,14 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size, return ins__raw_scnprintf(ins, bf, size, ops); } +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) +{ + if (!arch || !arch->ins_is_fused) + return false; + + return arch->ins_is_fused(arch, ins1, ins2); +} + static int call__parse(struct arch *arch, struct ins_operands *ops, struct map *map) { char *endptr, *tok, *name; @@ -1381,7 +1396,7 @@ static const char *annotate__norm_arch(const char *arch_name) int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize, - struct arch **parch) + struct arch **parch, char *cpuid) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1418,6 +1433,9 @@ int symbol__disassemble(struct symbol *sym, struct map *map, } } + if (arch->cpuid_parse && cpuid) + arch->cpuid_parse(arch, cpuid); + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1907,7 +1925,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, u64 len; if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL) < 0) + 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 21055034aedd..72d72728a0fc 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -53,6 +53,7 @@ bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); bool ins__is_ret(const struct ins *ins); int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; @@ -160,7 +161,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize, - struct arch **parch); + struct arch **parch, char *cpuid); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 413f74df08de..0e4cd6092564 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2610,3 +2610,10 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel) return evsel->evlist->env->arch; return NULL; } + +char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +{ + if (evsel && evsel->evlist && evsel->evlist->env) + return evsel->evlist->env->cpuid; + return NULL; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d101695c482c..219ad0cdb9f4 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -436,5 +436,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); char *perf_evsel__env_arch(struct perf_evsel *evsel); +char *perf_evsel__env_cpuid(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ From 7e63a13a266da652f82731b845b5c35dd866ec7e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 7 Jul 2017 13:06:35 +0800 Subject: [PATCH 002/253] perf annotate: Implement visual marker for macro fusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For marking fused instructions clearly this patch adds a line before the first instruction of pair and joins it with the arrow of the jump to its target. For example, when "je" is selected in annotate view, the line before cmpl is displayed and joins the arrow of "je". │ ┌──cmpl $0x0,argp_program_version_hook 81.93 │ ├──je 20 │ │ lock cmpxchg %esi,0x38a9a4(%rip) │ │↓ jne 29 │ │↓ jmp 43 11.47 │20:└─→cmpxch %esi,0x38a999(%rip) That means the cmpl+je is a fused instruction pair and they should be considered together. Changelog: v3: Use Arnaldo's fix to improve the arrow origin rendering. To get the evsel->evlist->env->cpuid, save the evsel in annotate_browser. v2: new function "ins__is_fused" to check if the instructions are fused. Signed-off-by: Yao Jin Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1499403995-19857-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 29 +++++++++++++++++++++++++++++ tools/perf/ui/browser.h | 2 ++ tools/perf/ui/browsers/annotate.c | 26 ++++++++++++++++++++++++++ tools/perf/util/annotate.c | 5 +++++ tools/perf/util/annotate.h | 1 + 5 files changed, 63 insertions(+) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 83874b0e266c..f73f3f13e01d 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -738,6 +738,35 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, __ui_browser__line_arrow_down(browser, column, start, end); } +void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, + unsigned int row, bool arrow_down) +{ + unsigned int end_row; + + if (row >= browser->top_idx) + end_row = row - browser->top_idx; + else + return; + + SLsmg_set_char_set(1); + + if (arrow_down) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + ui_browser__gotorc(browser, end_row + 1, column - 1); + SLsmg_write_char(SLSMG_LTEE_CHAR); + } else { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_LTEE_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + } + + SLsmg_set_char_set(0); +} + void ui_browser__init(void) { int i = 0; diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index be3b70eb5fca..a12eff75638b 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -43,6 +43,8 @@ void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...); void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); +void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, + unsigned int row, bool arrow_down); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *browser, const char *title, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index c4336138b673..8d3f6f53c122 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -273,6 +273,25 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy return true; } +static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) +{ + struct disasm_line *pos = list_prev_entry(cursor, node); + const char *name; + + if (!pos) + return false; + + if (ins__is_lock(&pos->ins)) + name = pos->ops.locked.ins.name; + else + name = pos->ins.name; + + if (!name || !cursor->ins.name) + return false; + + return ins__is_fused(ab->arch, name, cursor->ins.name); +} + static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); @@ -308,6 +327,13 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); + + if (is_fused(ab, cursor)) { + ui_browser__mark_fused(browser, + pcnt_width + 3 + ab->addr_width, + from - 1, + to > from ? true : false); + } } static unsigned int annotate_browser__refresh(struct ui_browser *browser) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 8748ebb3f932..ef434b53d849 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -517,6 +517,11 @@ bool ins__is_ret(const struct ins *ins) return ins->ops == &ret_ops; } +bool ins__is_lock(const struct ins *ins) +{ + return ins->ops == &lock_ops; +} + static int ins__key_cmp(const void *name, const void *insp) { const struct ins *ins = insp; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 72d72728a0fc..bac698d7cc6a 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -52,6 +52,7 @@ struct ins_ops { bool ins__is_jump(const struct ins *ins); bool ins__is_call(const struct ins *ins); bool ins__is_ret(const struct ins *ins); +bool ins__is_lock(const struct ins *ins); int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops); bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); From e000e5e33f92f6bb2d24ba2cda143cb6bb872495 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Jul 2017 13:07:00 -0300 Subject: [PATCH 003/253] perf trace: Remove F_ from some of the fcntl command strings The initial ones already had that "F_" prefix stripped to make things shorter, some hadn't, do it now. We do this to make the 'perf trace' output more compact. At some point perhaps the best thing to do is to have the tool do this stripping automatically, letting the user also decide if this is to be done or not. For now, be consistent. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2iot106xkl8rgb0hb8zm3gq5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4b2a5d298197..cfe1858ed074 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -407,9 +407,9 @@ static DEFINE_STRARRAY(whences); static const char *fcntl_cmds[] = { "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK", - "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64", - "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX", - "F_GETOWNER_UIDS", + "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64", + "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX", + "GETOWNER_UIDS", }; static DEFINE_STRARRAY(fcntl_cmds); From 83a516943136456b9983a0cc8ab4fa5a59860898 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Jul 2017 17:14:11 -0300 Subject: [PATCH 004/253] perf trace: Beautify linux specific fcntl commands We were only beautifying (transforming from an integer to its name) the non-linux specific fcntl syscall cmd args, fix it: Before: # perf trace -e fcntl -p 2472 0.000 ( 0.017 ms): gnome-terminal/2472 fcntl(fd: 55, cmd: 1030) = 56 ^C# After: # trace -e fcntl -p 2472 0.000 ( 0.015 ms): gnome-terminal/2472 fcntl(fd: 55, cmd: DUPFD_CLOEXEC) = 56 ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zigsxruk4wbfn8iylboy9wzo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 57 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index cfe1858ed074..431ef70067ed 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -64,6 +64,10 @@ # define O_CLOEXEC 02000000 #endif +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif + struct trace { struct perf_tool tool; struct syscalltbl *sctbl; @@ -317,6 +321,38 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, #define SCA_STRARRAY syscall_arg__scnprintf_strarray +struct strarrays { + int nr_entries; + struct strarray **entries; +}; + +#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \ + .nr_entries = ARRAY_SIZE(array), \ + .entries = array, \ +} + +static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, + struct syscall_arg *arg) +{ + struct strarrays *sas = arg->parm; + int i; + + for (i = 0; i < sas->nr_entries; ++i) { + struct strarray *sa = sas->entries[i]; + int idx = arg->val - sa->offset; + + if (idx >= 0 && idx < sa->nr_entries) { + if (sa->entries[idx] == NULL) + break; + return scnprintf(bf, size, "%s", sa->entries[idx]); + } + } + + return scnprintf(bf, size, "%d", arg->val); +} + +#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays + #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches as soon as the ioctl beautifier @@ -413,6 +449,20 @@ static const char *fcntl_cmds[] = { }; static DEFINE_STRARRAY(fcntl_cmds); +static const char *fcntl_linux_specific_cmds[] = { + "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", + "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", +}; + +static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); + +static struct strarray *fcntl_cmds_arrays[] = { + &strarray__fcntl_cmds, + &strarray__fcntl_linux_specific_cmds, +}; + +static DEFINE_STRARRAYS(fcntl_cmds_arrays); + static const char *rlimit_resources[] = { "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE", "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO", @@ -613,8 +663,8 @@ static struct syscall_fmt { { .name = "fchownat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, - .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, + .arg_scnprintf = { [1] = SCA_STRARRAYS, /* cmd */ }, + .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, @@ -1356,7 +1406,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, */ if (val == 0 && !(sc->arg_scnprintf && - sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && + (sc->arg_scnprintf[arg.idx] == SCA_STRARRAY || + sc->arg_scnprintf[arg.idx] == SCA_STRARRAYS) && sc->arg_parm[arg.idx])) continue; From ca3cf049d2430ac83eff5c32af7b86bb2d1285bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Jul 2017 17:15:24 -0300 Subject: [PATCH 005/253] tools: Update include/uapi/linux/fcntl.h copy from the kernel To get the changes in the commit c75b1d9421f8 ("fs: add fcntl() interface for setting/getting write life time hints"). Silencing this perf build warning: Warning: include/uapi/linux/fcntl.h differs from kernel We already beautify the fcntl cmd argument, so an upcoming cset will update the 'cmd' strarray to cover these new commands. The hints are in the 3rd arg, a pointer, so not yet supported in 'perf trace', for that we need to copy it somehow, probably using eBPF, a new attempt at doing that is planned. Cc: Adrian Hunter Cc: David Ahern Cc: Jens Axboe Cc: Jiri Olsa Cc: Martin K. Petersen Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-al471wzs3x48alql0tm3mnfa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fcntl.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 813afd6eee71..ec69d55bcec7 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -42,6 +42,27 @@ #define F_SEAL_WRITE 0x0008 /* prevent writes */ /* (1U << 31) is reserved for signed error codes */ +/* + * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the + * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on + * the specific file. + */ +#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11) +#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) +#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13) +#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14) + +/* + * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be + * used to clear any hints previously set. + */ +#define RWF_WRITE_LIFE_NOT_SET 0 +#define RWH_WRITE_LIFE_NONE 1 +#define RWH_WRITE_LIFE_SHORT 2 +#define RWH_WRITE_LIFE_MEDIUM 3 +#define RWH_WRITE_LIFE_LONG 4 +#define RWH_WRITE_LIFE_EXTREME 5 + /* * Types of directory notifications that may be requested. */ From 274e86fdd379992f90f37fab8df3f640c50fd2cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 09:38:38 -0300 Subject: [PATCH 006/253] perf trace beauty: Export the strarrays scnprintf method As we'll call it from the fcntl cmd scnprintf method, that needs to look at the cmd to mask the next fcntl argument when it is ignored. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fzlvkhew5vbxefneuciihgbc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++---- tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 431ef70067ed..ef1b1d4ea007 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -331,8 +331,8 @@ struct strarrays { .entries = array, \ } -static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, + struct syscall_arg *arg) { struct strarrays *sas = arg->parm; int i; @@ -351,8 +351,6 @@ static size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, return scnprintf(bf, size, "%d", arg->val); } -#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays - #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches as soon as the ioctl beautifier diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index cf50be3f17a4..a6348073a6e9 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -15,6 +15,9 @@ struct syscall_arg { u8 mask; }; +size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags From 5ca55ab6def81f8cd19a8b88f2ba4abe1aed34cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 16:25:35 -0300 Subject: [PATCH 007/253] perf trace: Only build tools/perf/trace/beauty/ when building 'perf trace' As it calls functions in builtin-trace.c. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bt3lhw1rvy3jzbsp2fvvegb0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Build b/tools/perf/Build index bd8eeb60533c..b48ca40fccf9 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -50,6 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ -libperf-y += trace/beauty/ +libperf-$(CONFIG_AUDIT) += trace/beauty/ gtk-y += ui/gtk/ From 9cb7cf86440229fd6e6ad1718712742c344653d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 09:44:50 -0300 Subject: [PATCH 008/253] perf trace beauty: Mask ignored fcntl 'arg' parameter A series of fcntl cmds ignore the third argument, so mask it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6vtl3zq1tauamrhm8o380ptn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/fcntl.c | 23 +++++++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/fcntl.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ef1b1d4ea007..b7f79dea3c44 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -661,7 +661,7 @@ static struct syscall_fmt { { .name = "fchownat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_STRARRAYS, /* cmd */ }, + .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */ }, .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index be95ac6ce845..c9e215b806f1 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1 +1,2 @@ +libperf-y += fcntl.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index a6348073a6e9..ce01079d8422 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -18,6 +18,9 @@ struct syscall_arg { size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays +size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c new file mode 100644 index 000000000000..7e4582c9308e --- /dev/null +++ b/tools/perf/trace/beauty/fcntl.c @@ -0,0 +1,23 @@ +/* + * trace/beauty/fcntl.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include + +size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg) +{ + /* + * Some commands ignore the third fcntl argument, "arg", so mask it + */ + if (arg->val == F_GETFD || arg->val == F_GETFL || + arg->val == F_GETOWN || arg->val == F_GET_SEALS || + arg->val == F_GETLEASE || arg->val == F_GETSIG) + arg->mask |= (1 << 2); + + return syscall_arg__scnprintf_strarrays(bf, size, arg); +} From f9f83b3344f9ed6e1d33a87810f38eca5ed6a14f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 10:13:56 -0300 Subject: [PATCH 009/253] perf trace beauty: Allow accessing syscall args values in a syscall arg formatter For instance, fcntl's upcoming 'arg' formatter needs to look at the 'cmd' value to decide how to format its value, sometimes it is a file flags, sometimes an fd, a pointer to a structure, etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2tw2jfaqm48dtw8a4addghze@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 23 ++++++++++++++++------- tools/perf/trace/beauty/beauty.h | 13 +++++++++++++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b7f79dea3c44..40bc0a326096 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1369,19 +1369,32 @@ static int trace__validate_ev_qualifier(struct trace *trace) * variable to read it. Most notably this avoids extended load instructions * on unaligned addresses */ +static unsigned long __syscall_arg__val(unsigned char *args, u8 idx) +{ + unsigned long val; + unsigned char *p = args + sizeof(unsigned long) * idx; + + memcpy(&val, p, sizeof(val)); + return val; +} + +unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) +{ + return __syscall_arg__val(arg->args, idx); +} static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; - unsigned char *p; unsigned long val; if (sc->args != NULL) { struct format_field *field; u8 bit = 1; struct syscall_arg arg = { + .args = args, .idx = 0, .mask = 0, .trace = trace, @@ -1393,9 +1406,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (arg.mask & bit) continue; - /* special care for unaligned accesses */ - p = args + sizeof(unsigned long) * arg.idx; - memcpy(&val, p, sizeof(val)); + val = syscall_arg__val(&arg, arg.idx); /* * Suppress this argument if its value is zero and @@ -1431,9 +1442,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, int i = 0; while (i < 6) { - /* special care for unaligned accesses */ - p = args + sizeof(unsigned long) * i; - memcpy(&val, p, sizeof(val)); + val = __syscall_arg__val(args, i); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", printed ? ", " : "", i, val); diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index ce01079d8422..6fbac0c8120d 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -6,8 +6,19 @@ struct trace; struct thread; +/** + * @val: value of syscall argument being formatted + * @args: All the args, use syscall_args__val(arg, nth) to access one + * @thread: tid state (maps, pid, tid, etc) + * @trace: 'perf trace' internals: all threads, etc + * @parm: private area, may be an strarray, for instance + * @idx: syscall arg idx (is this the first?) + * @mask: a syscall arg may mask another arg, see syscall_arg__scnprintf_futex_op + */ + struct syscall_arg { unsigned long val; + unsigned char *args; struct thread *thread; struct trace *trace; void *parm; @@ -15,6 +26,8 @@ struct syscall_arg { u8 mask; }; +unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays From 2c2b1623d46ce1f9c0ef5d21ff64a2c3f960e489 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 10:19:18 -0300 Subject: [PATCH 010/253] perf trace beauty: Export the "int" and "hex" syscall arg formatters The most basic ones, for pointers, unaugmented fds, etc, to be used in the initial fcntl 'arg' beautifier. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-g0lugj4vv6p4jtge32hid6q6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 10 ++-------- tools/perf/trace/beauty/beauty.h | 6 ++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 40bc0a326096..9e171401b664 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -392,22 +392,16 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd -static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) { return scnprintf(bf, size, "%#lx", arg->val); } -#define SCA_HEX syscall_arg__scnprintf_hex - -static size_t syscall_arg__scnprintf_int(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) { return scnprintf(bf, size, "%d", arg->val); } -#define SCA_INT syscall_arg__scnprintf_int - static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 6fbac0c8120d..d085aacecd49 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -31,6 +31,12 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays +size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_HEX syscall_arg__scnprintf_hex + +size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_INT syscall_arg__scnprintf_int + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd From 5dde91edbf6776daba0981c5bc47f1240427e05c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 10:34:16 -0300 Subject: [PATCH 011/253] perf trace beauty: Introduce syscall arg beautifier for long integers Will be used in the fcntl arg beautifier, that nowadays formats as '%ld' because there is no explicit arg beautifier attached, but as we will have to first decide what beautifier to use (i.e. it may be a pointer, etc) then we need to have this exported as a separate beautifier to be called from there. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d7bfs3m8m70j3zckeam0kk5d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 +++++ tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9e171401b664..a323736043e1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -402,6 +402,11 @@ size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg return scnprintf(bf, size, "%d", arg->val); } +size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) +{ + return scnprintf(bf, size, "%ld", arg->val); +} + static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index d085aacecd49..61aec19e55ec 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -37,6 +37,9 @@ size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg); #define SCA_INT syscall_arg__scnprintf_int +size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_LONG syscall_arg__scnprintf_long + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd From 84d1d8a12df33a3e707866b9a1f9872058c3d1fb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 17:09:21 -0300 Subject: [PATCH 012/253] tools include uapi asm-generic: Grab a copy of fcntl.h We'll need defines for beautifying fcntl arguments that are not available in older distros, these: trace/beauty/fcntl.c: In function 'syscall_arg__scnprintf_fcntl_arg': trace/beauty/fcntl.c:93: error: 'F_OFD_SETLK' undeclared (first use in this function) trace/beauty/fcntl.c:93: error: (Each undeclared identifier is reported only once trace/beauty/fcntl.c:93: error: for each function it appears in.) trace/beauty/fcntl.c:93: error: 'F_OFD_SETLKW' undeclared (first use in this function) trace/beauty/fcntl.c:93: error: 'F_OFD_GETLK' undeclared (first use in this function) trace/beauty/fcntl.c:94: error: 'F_GETOWN_EX' undeclared (first use in this function) trace/beauty/fcntl.c:94: error: 'F_SETOWN_EX' undeclared (first use in this function) Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gvlw67a47e9z65jdunj4je5s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/fcntl.h | 220 +++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 tools/include/uapi/asm-generic/fcntl.h diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h new file mode 100644 index 000000000000..ac190958c981 --- /dev/null +++ b/tools/include/uapi/asm-generic/fcntl.h @@ -0,0 +1,220 @@ +#ifndef _ASM_GENERIC_FCNTL_H +#define _ASM_GENERIC_FCNTL_H + +#include + +/* + * FMODE_EXEC is 0x20 + * FMODE_NONOTIFY is 0x4000000 + * These cannot be used by userspace O_* until internal and external open + * flags are split. + * -Eric Paris + */ + +/* + * When introducing new O_* bits, please check its uniqueness in fcntl_init(). + */ + +#define O_ACCMODE 00000003 +#define O_RDONLY 00000000 +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#ifndef O_CREAT +#define O_CREAT 00000100 /* not fcntl */ +#endif +#ifndef O_EXCL +#define O_EXCL 00000200 /* not fcntl */ +#endif +#ifndef O_NOCTTY +#define O_NOCTTY 00000400 /* not fcntl */ +#endif +#ifndef O_TRUNC +#define O_TRUNC 00001000 /* not fcntl */ +#endif +#ifndef O_APPEND +#define O_APPEND 00002000 +#endif +#ifndef O_NONBLOCK +#define O_NONBLOCK 00004000 +#endif +#ifndef O_DSYNC +#define O_DSYNC 00010000 /* used to be O_SYNC, see below */ +#endif +#ifndef FASYNC +#define FASYNC 00020000 /* fcntl, for BSD compatibility */ +#endif +#ifndef O_DIRECT +#define O_DIRECT 00040000 /* direct disk access hint */ +#endif +#ifndef O_LARGEFILE +#define O_LARGEFILE 00100000 +#endif +#ifndef O_DIRECTORY +#define O_DIRECTORY 00200000 /* must be a directory */ +#endif +#ifndef O_NOFOLLOW +#define O_NOFOLLOW 00400000 /* don't follow links */ +#endif +#ifndef O_NOATIME +#define O_NOATIME 01000000 +#endif +#ifndef O_CLOEXEC +#define O_CLOEXEC 02000000 /* set close_on_exec */ +#endif + +/* + * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using + * the O_SYNC flag. We continue to use the existing numerical value + * for O_DSYNC semantics now, but using the correct symbolic name for it. + * This new value is used to request true Posix O_SYNC semantics. It is + * defined in this strange way to make sure applications compiled against + * new headers get at least O_DSYNC semantics on older kernels. + * + * This has the nice side-effect that we can simply test for O_DSYNC + * wherever we do not care if O_DSYNC or O_SYNC is used. + * + * Note: __O_SYNC must never be used directly. + */ +#ifndef O_SYNC +#define __O_SYNC 04000000 +#define O_SYNC (__O_SYNC|O_DSYNC) +#endif + +#ifndef O_PATH +#define O_PATH 010000000 +#endif + +#ifndef __O_TMPFILE +#define __O_TMPFILE 020000000 +#endif + +/* a horrid kludge trying to make sure that this will fail on old kernels */ +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) + +#ifndef O_NDELAY +#define O_NDELAY O_NONBLOCK +#endif + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#ifndef F_GETLK +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 +#endif +#ifndef F_SETOWN +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#endif +#ifndef F_SETSIG +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ +#endif + +#ifndef CONFIG_64BIT +#ifndef F_GETLK64 +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 +#endif +#endif + +#ifndef F_SETOWN_EX +#define F_SETOWN_EX 15 +#define F_GETOWN_EX 16 +#endif + +#ifndef F_GETOWNER_UIDS +#define F_GETOWNER_UIDS 17 +#endif + +/* + * Open File Description Locks + * + * Usually record locks held by a process are released on *any* close and are + * not inherited across a fork(). + * + * These cmd values will set locks that conflict with process-associated + * record locks, but are "owned" by the open file description, not the + * process. This means that they are inherited across fork() like BSD (flock) + * locks, and they are only released automatically when the last reference to + * the the open file against which they were acquired is put. + */ +#define F_OFD_GETLK 36 +#define F_OFD_SETLK 37 +#define F_OFD_SETLKW 38 + +#define F_OWNER_TID 0 +#define F_OWNER_PID 1 +#define F_OWNER_PGRP 2 + +struct f_owner_ex { + int type; + __kernel_pid_t pid; +}; + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#ifndef F_RDLCK +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 +#endif + +/* for old implementation of bsd flock () */ +#ifndef F_EXLCK +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ +#endif + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock ... */ +#define LOCK_READ 64 /* which allows concurrent read operations */ +#define LOCK_WRITE 128 /* which allows concurrent write operations */ +#define LOCK_RW 192 /* which allows concurrent read & write ops */ + +#define F_LINUX_SPECIFIC_BASE 1024 + +#ifndef HAVE_ARCH_STRUCT_FLOCK +#ifndef __ARCH_FLOCK_PAD +#define __ARCH_FLOCK_PAD +#endif + +struct flock { + short l_type; + short l_whence; + __kernel_off_t l_start; + __kernel_off_t l_len; + __kernel_pid_t l_pid; + __ARCH_FLOCK_PAD +}; +#endif + +#ifndef HAVE_ARCH_STRUCT_FLOCK64 +#ifndef __ARCH_FLOCK64_PAD +#define __ARCH_FLOCK64_PAD +#endif + +struct flock64 { + short l_type; + short l_whence; + __kernel_loff_t l_start; + __kernel_loff_t l_len; + __kernel_pid_t l_pid; + __ARCH_FLOCK64_PAD +}; +#endif + +#endif /* _ASM_GENERIC_FCNTL_H */ From 9c47f66748381ecbaa3a5eb301b81fd2fd0a282d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 11:05:40 -0300 Subject: [PATCH 013/253] perf trace beauty fcntl: Basic 'arg' beautifier Sometimes it should be printed as an hex number, like with F_SETLK, F_SETLKW and F_GETLK, that treat 'arg' as a struct flock pointer, in other cases it is just an integer. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2gykg6enk7vos6q0m97hkgsg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/fcntl.c | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a323736043e1..2a894ea7f540 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -660,7 +660,8 @@ static struct syscall_fmt { { .name = "fchownat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */ }, + .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */ + [2] = SCA_FCNTL_ARG, /* arg */ }, .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 61aec19e55ec..d76f903bf2ed 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -43,6 +43,9 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd +size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 7e4582c9308e..8a5f58d9eb7c 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -21,3 +21,21 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar return syscall_arg__scnprintf_strarrays(bf, size, arg); } + +size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg) +{ + int cmd = syscall_arg__val(arg, 1); + + /* + * We still don't grab the contents of pointers on entry or exit, + * so just print them as hex numbers + */ + if (cmd == F_SETLK || cmd == F_SETLKW || cmd == F_GETLK || + cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK || + cmd == F_GETOWN_EX || cmd == F_SETOWN_EX || + cmd == F_GET_RW_HINT || cmd == F_SET_RW_HINT || + cmd == F_GET_FILE_RW_HINT || cmd == F_SET_FILE_RW_HINT) + return syscall_arg__scnprintf_hex(bf, size, arg); + + return syscall_arg__scnprintf_long(bf, size, arg); +} From 64e4561d17c54b64368735bdf89d6873f550d918 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Jul 2017 17:34:46 -0300 Subject: [PATCH 014/253] perf trace: Beautify new write hint fcntl commands Those introduced by the commit c75b1d9421f8 ("fs: add fcntl() interface for setting/getting write life time hints"), tested using the proggie in that commit comment: # perf trace -e fcntl ./write_hint write_hint.c fcntl: F_GET_RW_HINT: Invalid argument 0.000 ( 0.006 ms): write_hint/7576 fcntl(fd: 3, cmd: GET_RW_HINT, arg: 0x7ffc6c918da0) = -1 EINVAL Invalid argument 0.014 ( 0.004 ms): write_hint/7576 fcntl(fd: 4, cmd: GETFL) = 33794 # perf trace -e fcntl ./write_hint write_hint.c 1 fcntl: F_SET_RW_HINT: Invalid argument 0.000 ( 0.007 ms): write_hint/7578 fcntl(fd: 3, cmd: SET_RW_HINT, arg: 0x7fff03866d70) = -1 EINVAL Invalid argument 0.019 ( 0.002 ms): write_hint/7578 fcntl(fd: 4, cmd: GETFL) = 33794 # Cc: Adrian Hunter Cc: David Ahern Cc: Jens Axboe Cc: Jiri Olsa Cc: Martin K. Petersen Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-iacglkc99cchou87k62736dn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2a894ea7f540..1f070bc43742 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -449,6 +449,7 @@ static DEFINE_STRARRAY(fcntl_cmds); static const char *fcntl_linux_specific_cmds[] = { "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC", "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS", + "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT", }; static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE); From b239ad28a8c9571c45e62e486ce75ebad17de6c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 11:18:41 -0300 Subject: [PATCH 015/253] perf beauty open: Detach the syscall_arg agnostic bits from the flags formatter We may want to use this in other contexts, like when formatting the return of fcntl(fd, F_GETFL). Make it have the following signature, so that we can set the formatter for the return argument while processing the arguments of a syscall, as fcntl, for instance, may return fds, flags, etc, so need different return value formatters: size_t formatter(unsigned long value, char *bf, size_t size); This gets so detached from 'perf trace' internals that we may well get all these and move to a tools/lib/syscall_beauty/ library at some point and use it in other tools/ living utilities. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-9aw8t22ztvnkuv26l6sw1c18@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/beauty.h | 2 ++ tools/perf/trace/beauty/open_flags.c | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index d76f903bf2ed..c05bb2ecf502 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -52,4 +52,6 @@ size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask +size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size); + #endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index f55a4597fc38..b3c7f1155d80 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -14,13 +14,9 @@ #define O_NOATIME 01000000 #endif -static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, - struct syscall_arg *arg) +size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) { - int printed = 0, flags = arg->val; - - if (!(flags & O_CREAT)) - arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ + int printed = 0; if (flags == 0) return scnprintf(bf, size, "RDONLY"); @@ -68,4 +64,15 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, return printed; } +static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + int flags = arg->val; + + if (!(flags & O_CREAT)) + arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */ + + return open__scnprintf_flags(flags, bf, size); +} + + #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags From 84486caad942bfea338716e22010a754a0ca9199 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 11:35:03 -0300 Subject: [PATCH 016/253] perf trace: Allow syscall_arg beautifiers to set a different return formatter Things like fcntl will use this to set the right formatter based on its 'cmd' argument. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-4ea3wplb8b4j7aymj0d5uo0h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 27 ++++++++++++++++++++++++++- tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1f070bc43742..d48981c36b70 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -908,6 +908,8 @@ static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) * filename.ptr: The filename char pointer that will be vfs_getname'd * filename.entry_str_pos: Where to insert the string translated from * filename.ptr by the vfs_getname tracepoint/kprobe. + * ret_scnprintf: syscall args may set this to a different syscall return + * formatter, for instance, fcntl may return fds, file flags, etc. */ struct thread_trace { u64 entry_time; @@ -916,6 +918,7 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; double runtime_ms; + size_t (*ret_scnprintf)(unsigned long value, char *bf, size_t size); struct { unsigned long ptr; short int entry_str_pos; @@ -966,6 +969,15 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) return NULL; } + +void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, + size_t (*ret_scnprintf)(unsigned long val, char *bf, size_t size)) +{ + struct thread_trace *ttrace = thread__priv(arg->thread); + + ttrace->ret_scnprintf = ret_scnprintf; +} + #define TRACE_PFMAJ (1 << 0) #define TRACE_PFMIN (1 << 1) @@ -1390,6 +1402,14 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, { size_t printed = 0; unsigned long val; + struct thread_trace *ttrace = thread__priv(thread); + + /* + * Things like fcntl will set this in its 'cmd' formatter to pick the + * right formatter for the return value (an fd? file flags?), which is + * not needed for syscalls that always return a given type, say an fd. + */ + ttrace->ret_scnprintf = NULL; if (sc->args != NULL) { struct format_field *field; @@ -1704,7 +1724,12 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, ") = -1 %s %s", e, emsg); } else if (ret == 0 && sc->fmt->timeout) fprintf(trace->output, ") = 0 Timeout"); - else if (sc->fmt->hexret) + else if (ttrace->ret_scnprintf) { + char bf[1024]; + ttrace->ret_scnprintf(ret, bf, sizeof(bf)); + ttrace->ret_scnprintf = NULL; + fprintf(trace->output, ") = %s", bf); + } else if (sc->fmt->hexret) fprintf(trace->output, ") = %#lx", ret); else if (sc->fmt->errpid) { struct thread *child = machine__find_thread(trace->host, ret, ret); diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index c05bb2ecf502..a9613d2e6cfe 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -54,4 +54,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size); +void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, + size_t (*ret_scnprintf)(unsigned long val, char *bf, size_t size)); + #endif /* _PERF_TRACE_BEAUTY_H */ From 6b3d5c97dbbc2a8e1f8d7e7d87b8c6637293e803 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 11:57:28 -0300 Subject: [PATCH 017/253] perf trace beauty open flags: Support O_TMPFILE and O_NOFOLLOW The open syscall flags beautifier wasn't considering those flags, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ukzoldh4arrl8x2uwjafd22h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/open_flags.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index b3c7f1155d80..5fb57bed7ea2 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -14,6 +14,10 @@ #define O_NOATIME 01000000 #endif +#ifndef O_TMPFILE +#define O_TMPFILE 020000000 +#endif + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) { int printed = 0; @@ -34,6 +38,8 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) P_FLAG(DIRECTORY); P_FLAG(EXCL); P_FLAG(LARGEFILE); + P_FLAG(NOFOLLOW); + P_FLAG(TMPFILE); P_FLAG(NOATIME); P_FLAG(NOCTTY); #ifdef O_NONBLOCK From b84148a910598f3e733e54f3cb736d11911291df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 12:09:31 -0300 Subject: [PATCH 018/253] perf trace beauty open flags: Do not depend on the system's O_LARGEFILE define In x86_64 /usr/include/bits/fcntl.h sets it to zero, so just undef it and use the standard 00100000 value when decoding the open flags arg. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-k28megguz5snwop9obvn9mcr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/open_flags.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index 5fb57bed7ea2..e520112fdb6e 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -18,6 +18,9 @@ #define O_TMPFILE 020000000 #endif +#undef O_LARGEFILE +#define O_LARGEFILE 00100000 + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) { int printed = 0; From 89e8524abe1c584942e6b05744946877e87dd478 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 12:14:20 -0300 Subject: [PATCH 019/253] perf trace beauty fcntl: Beautify F_GETFL return value The return for fcntl(fd, F_GETFL) is the fd file flags, so reuse the one for the open syscall flags parameter: 997.992 (0.002 ms): Chrome_IOThrea/19863 fcntl(fd: 144, cmd: GETFL) = RDWR|LARGEFILE Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-5nn3n4p4yfs6u0leoq880apc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 8a5f58d9eb7c..8dcbc30427c9 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -11,13 +11,19 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg) { + if (arg->val == F_GETFL) { + syscall_arg__set_ret_scnprintf(arg, open__scnprintf_flags); + goto mask_arg; + } /* * Some commands ignore the third fcntl argument, "arg", so mask it */ - if (arg->val == F_GETFD || arg->val == F_GETFL || + if (arg->val == F_GETFD || arg->val == F_GETOWN || arg->val == F_GET_SEALS || - arg->val == F_GETLEASE || arg->val == F_GETSIG) + arg->val == F_GETLEASE || arg->val == F_GETSIG) { +mask_arg: arg->mask |= (1 << 2); + } return syscall_arg__scnprintf_strarrays(bf, size, arg); } From e07f93c0927050184110e189a69a0b9d73c0c9b1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 14:23:17 -0300 Subject: [PATCH 020/253] perf trace beauty open flags: Move RDRW to the start of the output We were getting: 62597.859 ( 0.005 ms): TaskSchedulerF/18107 fcntl(fd: 194, cmd: GETFL) = LARGEFILE|RDWR Instead of the more familiar (from looking at strace output): 62597.859 ( 0.005 ms): TaskSchedulerF/18107 fcntl(fd: 194, cmd: GETFL) = RDWR|LARGEFILE Fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d4d9nd88t4bu9y9odbrcb5z6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/open_flags.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index e520112fdb6e..3c20683d8cfb 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -33,6 +33,7 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) flags &= ~O_##n; \ } + P_FLAG(RDWR); P_FLAG(APPEND); P_FLAG(ASYNC); P_FLAG(CLOEXEC); @@ -53,7 +54,6 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) #ifdef O_PATH P_FLAG(PATH); #endif - P_FLAG(RDWR); #ifdef O_DSYNC if ((flags & O_SYNC) == O_SYNC) printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC"); From 12c0c0cef9a64ee30825122ec9d620ed29fcf5ba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 14:36:24 -0300 Subject: [PATCH 021/253] perf trace beauty fcntl flags: Beautify F_SETFL arg Result: 0.011 (0.001 ms): Chrome_IOThrea/19863 fcntl(fd: 130, cmd: SETFL, arg: RDWR|APPEND|LARGEFILE) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qgf8ggsq9chnjblxlq954deu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 8dcbc30427c9..3456bf4cdb48 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -32,6 +32,8 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar { int cmd = syscall_arg__val(arg, 1); + if (cmd == F_SETFL) + return open__scnprintf_flags(arg->val, bf, size); /* * We still don't grab the contents of pointers on entry or exit, * so just print them as hex numbers From c2e539d287a2e7c633ad6ecb0cb6b6f13ea05125 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 14:50:52 -0300 Subject: [PATCH 022/253] perf trace beauty fcntl: Beautify F_[GS]ETFD arg/return value Now it will show 0 or CLOEXEC, the only !0 value returned by the kernel for fcntl(fd, F_GETFD). And for F_SETFD: 6870.267 ( 0.004 ms): make/29812 fcntl(fd: 7, cmd: SETFD, arg: CLOEXEC) = 0 6873.805 ( 0.002 ms): make/29816 fcntl(fd: 6, cmd: SETFD, arg: CLOEXEC) = 0 77986.150 ( 0.006 ms): alsa-sink-ALC3/2042 fcntl(fd: 45, cmd: SETFD, arg: CLOEXEC) = 0 77986.271 ( 0.006 ms): alsa-sink-ALC3/2042 fcntl(fd: 23, cmd: SETFD, arg: CLOEXEC) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-sz9dob7t4zd6m65femazpaah@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 3456bf4cdb48..384f177a4304 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -7,19 +7,28 @@ */ #include "trace/beauty/beauty.h" +#include #include +static size_t fcntl__scnprintf_getfd(unsigned long val, char *bf, size_t size) +{ + return scnprintf(bf, size, "%s", val ? "CLOEXEC" : "0"); +} + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg) { if (arg->val == F_GETFL) { syscall_arg__set_ret_scnprintf(arg, open__scnprintf_flags); goto mask_arg; } + if (arg->val == F_GETFD) { + syscall_arg__set_ret_scnprintf(arg, fcntl__scnprintf_getfd); + goto mask_arg; + } /* * Some commands ignore the third fcntl argument, "arg", so mask it */ - if (arg->val == F_GETFD || - arg->val == F_GETOWN || arg->val == F_GET_SEALS || + if (arg->val == F_GETOWN || arg->val == F_GET_SEALS || arg->val == F_GETLEASE || arg->val == F_GETSIG) { mask_arg: arg->mask |= (1 << 2); @@ -32,6 +41,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar { int cmd = syscall_arg__val(arg, 1); + if (cmd == F_SETFD) + return fcntl__scnprintf_getfd(arg->val, bf, size); + if (cmd == F_SETFL) return open__scnprintf_flags(arg->val, bf, size); /* From 7ee5743404e3641f3c11792761632a5a7d583587 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 15:16:54 -0300 Subject: [PATCH 023/253] perf trace beauty: Give syscall return beautifier more context We need the current thread and the trace internal state so that we can use the fd beautifier to augment syscall returns, so use struct syscall_arg with some fields that make sense on returns (val, thread, trace). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-lqag8e86ybidrh5zpqne05ov@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 11 ++++++++--- tools/perf/trace/beauty/beauty.h | 5 ++++- tools/perf/trace/beauty/fcntl.c | 9 +++++++-- tools/perf/trace/beauty/open_flags.c | 5 +---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d48981c36b70..cfa8bf1cca43 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -918,7 +918,7 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; double runtime_ms; - size_t (*ret_scnprintf)(unsigned long value, char *bf, size_t size); + size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); struct { unsigned long ptr; short int entry_str_pos; @@ -971,7 +971,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, - size_t (*ret_scnprintf)(unsigned long val, char *bf, size_t size)) + size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) { struct thread_trace *ttrace = thread__priv(arg->thread); @@ -1726,7 +1726,12 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, ") = 0 Timeout"); else if (ttrace->ret_scnprintf) { char bf[1024]; - ttrace->ret_scnprintf(ret, bf, sizeof(bf)); + struct syscall_arg arg = { + .val = ret, + .thread = thread, + .trace = trace, + }; + ttrace->ret_scnprintf(bf, sizeof(bf), &arg); ttrace->ret_scnprintf = NULL; fprintf(trace->output, ") = %s", bf); } else if (sc->fmt->hexret) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index a9613d2e6cfe..b64c4116cdc4 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -46,6 +46,9 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg +size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags @@ -55,6 +58,6 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, - size_t (*ret_scnprintf)(unsigned long val, char *bf, size_t size)); + size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); #endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 384f177a4304..9de80a7eddfa 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -15,14 +15,19 @@ static size_t fcntl__scnprintf_getfd(unsigned long val, char *bf, size_t size) return scnprintf(bf, size, "%s", val ? "CLOEXEC" : "0"); } +static size_t syscall_arg__scnprintf_fcntl_getfd(char *bf, size_t size, struct syscall_arg *arg) +{ + return fcntl__scnprintf_getfd(arg->val, bf, size); +} + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg) { if (arg->val == F_GETFL) { - syscall_arg__set_ret_scnprintf(arg, open__scnprintf_flags); + syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_open_flags); goto mask_arg; } if (arg->val == F_GETFD) { - syscall_arg__set_ret_scnprintf(arg, fcntl__scnprintf_getfd); + syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getfd); goto mask_arg; } /* diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index 3c20683d8cfb..e359e041dc0e 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -73,7 +73,7 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size) return printed; } -static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) +size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { int flags = arg->val; @@ -82,6 +82,3 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct sy return open__scnprintf_flags(flags, bf, size); } - - -#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags From fc65eb8213a437b43e9de7776699aaa25dfa00df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 15:21:40 -0300 Subject: [PATCH 024/253] perf trace beauty: Export the fd beautifier for use in more places Now that the beautifiers are being split into multiple source and object files, we will need more of them exported, do it for the 'fd' one, will be used to augment the return of some syscalls that may return an 'fd', such as fcntl(fd, F_DUPFD). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-39sosu12hhywyunqf5s74ewf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 8 +------- tools/perf/trace/beauty/beauty.h | 3 +++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index cfa8bf1cca43..65fa0126e939 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -365,11 +365,6 @@ static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray #endif /* defined(__i386__) || defined(__x86_64__) */ -static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, - struct syscall_arg *arg); - -#define SCA_FD syscall_arg__scnprintf_fd - #ifndef AT_FDCWD #define AT_FDCWD -100 #endif @@ -1057,8 +1052,7 @@ static const char *thread__fd_path(struct thread *thread, int fd, return ttrace->paths.table[fd]; } -static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) { int fd = arg->val; size_t printed = scnprintf(bf, size, "%d", fd); diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index b64c4116cdc4..790e8307fc6a 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -31,6 +31,9 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays +size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FD syscall_arg__scnprintf_fd + size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg); #define SCA_HEX syscall_arg__scnprintf_hex From 07a0572439450e2631f90d595965c379de105bab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 15:24:19 -0300 Subject: [PATCH 025/253] perf trace beauty fcntl: Augment the return of F_DUPFD(_CLOEXEC) Using the existing 'fd' beautifier, now we can see the path for the just dup'ed fd: 18031.338 ( 0.009 ms): gnome-terminal/2472 fcntl(fd: 55, cmd: DUPFD_CLOEXEC) = 56 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-z0ggo126p2eobfwnjw9z16tw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 9de80a7eddfa..254ae435780d 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -30,6 +30,10 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getfd); goto mask_arg; } + if (arg->val == F_DUPFD_CLOEXEC || arg->val == F_DUPFD) { + syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fd); + goto out; + } /* * Some commands ignore the third fcntl argument, "arg", so mask it */ @@ -38,7 +42,7 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar mask_arg: arg->mask |= (1 << 2); } - +out: return syscall_arg__scnprintf_strarrays(bf, size, arg); } From ff2f1b2d35aeb672200af4772db1847fdcdfd77b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 15:21:40 -0300 Subject: [PATCH 026/253] perf trace beauty: Export the pid beautifier for use in more places Now that the beautifiers are being split into multiple source and object files, we will need more of them exported, do it for the 'pid' one, will be used to augment the return of some syscalls that may return a 'pid', such as fcntl(fd, F_GETOWN). Will also be used for fcntl(fd, F_SETOWN, pid). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7gr5nt9p5skp4i1w0ja1w272@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/pid.c | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 790e8307fc6a..9ccf0f323fe5 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -43,6 +43,9 @@ size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg); #define SCA_LONG syscall_arg__scnprintf_long +size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PID syscall_arg__scnprintf_pid + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c index 07486ea65ae3..b6d419e16dcf 100644 --- a/tools/perf/trace/beauty/pid.c +++ b/tools/perf/trace/beauty/pid.c @@ -1,4 +1,4 @@ -static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) +size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) { int pid = arg->val; struct trace *trace = arg->trace; @@ -17,5 +17,3 @@ static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_a return printed; } - -#define SCA_PID syscall_arg__scnprintf_pid From 1a4ad26393bc73d3294bb7919cb4cd2dfffbe57c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jul 2017 15:39:40 -0300 Subject: [PATCH 027/253] perf trace beauty fcntl: Beautify F_GETOWN and F_SETOWN By attaching the pid beautifier to the args in the F_SETOWN case and to the syscall return on the F_GETOWN one. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ea1prtqvao87cdrishce7954@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 254ae435780d..d082fd2e594f 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -34,10 +34,14 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fd); goto out; } + if (arg->val == F_GETOWN) { + syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_pid); + goto mask_arg; + } /* * Some commands ignore the third fcntl argument, "arg", so mask it */ - if (arg->val == F_GETOWN || arg->val == F_GET_SEALS || + if (arg->val == F_GET_SEALS || arg->val == F_GETLEASE || arg->val == F_GETSIG) { mask_arg: arg->mask |= (1 << 2); @@ -55,6 +59,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar if (cmd == F_SETFL) return open__scnprintf_flags(arg->val, bf, size); + + if (cmd == F_SETOWN) + return syscall_arg__scnprintf_pid(bf, size, arg); /* * We still don't grab the contents of pointers on entry or exit, * so just print them as hex numbers From 8b3cf3d812140dada8e82650c96d3980e5cd1b73 Mon Sep 17 00:00:00 2001 From: Shriya Date: Mon, 19 Jun 2017 12:00:42 +0530 Subject: [PATCH 028/253] perf pmu-events: Support additional POWER8+ PVR in mapfile Add support for POWER8+ PVR 004c0100 for Garrison Signed-off-by: Shriya Reviewed-by: Madhavan Srinivasan Cc: Alexander Shishkin Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1497853842-11023-1-git-send-email-shriyak@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/mapfile.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index e925baa0c30b..f03aec70dfc9 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -19,3 +19,4 @@ 004d0000,1,power8.json,core 004d0100,1,power8.json,core 004d0200,1,power8.json,core +004c0100,1,power8.json,core From 826db0f154ba5bee7d913635644a6f61f993a9b3 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 11 Jul 2017 17:16:00 -0400 Subject: [PATCH 029/253] perf vendor events: Add POWER9 PMU events Add POWER9 PMU events. Signed-off-by: Sukadev Bhattiprolu Cc: Andi Kleen Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Link: http://lkml.kernel.org/n/tip-i08irl1x1i914xsikiomvqip@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/cache.json | 176 ++++ .../arch/powerpc/power9/floating-point.json | 44 + .../arch/powerpc/power9/frontend.json | 446 ++++++++++ .../arch/powerpc/power9/marked.json | 782 ++++++++++++++++ .../arch/powerpc/power9/memory.json | 158 ++++ .../pmu-events/arch/powerpc/power9/other.json | 836 ++++++++++++++++++ .../arch/powerpc/power9/pipeline.json | 680 ++++++++++++++ .../pmu-events/arch/powerpc/power9/pmc.json | 146 +++ .../arch/powerpc/power9/translation.json | 272 ++++++ 9 files changed, 3540 insertions(+) create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/cache.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/floating-point.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/frontend.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/marked.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/memory.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/other.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/pipeline.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/pmc.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/translation.json diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json new file mode 100644 index 000000000000..437c83b7e6af --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -0,0 +1,176 @@ +[ + {, + "EventCode": "0x1002A", + "EventName": "PM_CMPLU_STALL_LARX", + "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied", + "PublicDescription": "" + }, + {, + "EventCode": "0x1003C", + "EventName": "PM_CMPLU_STALL_DMISS_L2L3", + "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3", + "PublicDescription": "" + }, + {, + "EventCode": "0x14048", + "EventName": "PM_INST_FROM_ON_CHIP_CACHE", + "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E054", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", + "PublicDescription": "" + }, + {, + "EventCode": "0x400F0", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "Load Missed L1, at execution time (not gated by finish, which means this counter can be greater than loads finished)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1404A", + "EventName": "PM_INST_FROM_RL2L3_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C058", + "EventName": "PM_DTLB_MISS_16G", + "BriefDescription": "Data TLB Miss page size 16G", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D15C", + "EventName": "PM_MRK_DTLB_MISS_1G", + "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E056", + "EventName": "PM_CMPLU_STALL_FLUSH_ANY_THREAD", + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", + "PublicDescription": "" + }, + {, + "EventCode": "0x101E6", + "EventName": "PM_THRESH_EXC_4096", + "BriefDescription": "Threshold counter exceed a count of 4096", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C01A", + "EventName": "PM_CMPLU_STALL_LHS", + "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D016", + "EventName": "PM_CMPLU_STALL_FXU", + "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "PublicDescription": "" + }, + {, + "EventCode": "0x24046", + "EventName": "PM_INST_FROM_RL2L3_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2404A", + "EventName": "PM_INST_FROM_RL4", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F140", + "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D15E", + "EventName": "PM_MRK_DTLB_MISS_16G", + "BriefDescription": "Marked Data TLB Miss page size 16G", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F14A", + "EventName": "PM_MRK_DPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D156", + "EventName": "PM_MRK_DTLB_MISS_64K", + "BriefDescription": "Marked Data TLB Miss page size 64K", + "PublicDescription": "" + }, + {, + "EventCode": "0x3006C", + "EventName": "PM_RUN_CYC_SMT2_MODE", + "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode", + "PublicDescription": "" + }, + {, + "EventCode": "0x300F4", + "EventName": "PM_THRD_CONC_RUN_INST", + "BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C014", + "EventName": "PM_CMPLU_STALL_LMQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C016", + "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT", + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D014", + "EventName": "PM_CMPLU_STALL_LOAD_FINISH", + "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D016", + "EventName": "PM_CMPLU_STALL_FXLONG", + "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D12A", + "EventName": "PM_MRK_DATA_FROM_RL4_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C15E", + "EventName": "PM_MRK_DTLB_MISS_16M", + "BriefDescription": "Marked Data TLB Miss page size 16M", + "PublicDescription": "" + }, + {, + "EventCode": "0x401E4", + "EventName": "PM_MRK_DTLB_MISS", + "BriefDescription": "Marked dtlb miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x401EA", + "EventName": "PM_THRESH_EXC_128", + "BriefDescription": "Threshold counter exceeded a value of 128", + "PublicDescription": "" + }, + {, + "EventCode": "0x400F6", + "EventName": "PM_BR_MPRED_CMPL", + "BriefDescription": "Number of Branch Mispredicts", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json new file mode 100644 index 000000000000..d4e4669c1cf3 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json @@ -0,0 +1,44 @@ +[ + {, + "EventCode": "0x10058", + "EventName": "PM_MEM_LOC_THRESH_IFU", + "BriefDescription": "Local Memory above threshold for IFU speculation control", + "PublicDescription": "" + }, + {, + "EventCode": "0x4505E", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operation Finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x1415A", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D028", + "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L2", + "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D154", + "EventName": "PM_MRK_DERAT_MISS_64K", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K", + "PublicDescription": "" + }, + {, + "EventCode": "0x30012", + "EventName": "PM_FLUSH_COMPLETION", + "BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush", + "PublicDescription": "" + }, + {, + "EventCode": "0x4016E", + "EventName": "PM_THRESH_NOT_MET", + "BriefDescription": "Threshold counter did not meet threshold", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json new file mode 100644 index 000000000000..5da59d15af94 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -0,0 +1,446 @@ +[ + {, + "EventCode": "0x20036", + "EventName": "PM_BR_2PATH", + "BriefDescription": "Branches that are not strongly biased", + "PublicDescription": "" + }, + {, + "EventCode": "0x40036", + "EventName": "PM_BR_2PATH", + "BriefDescription": "Branches that are not strongly biased", + "PublicDescription": "" + }, + {, + "EventCode": "0x10004", + "EventName": "PM_CMPLU_STALL_LRQ_OTHER", + "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", + "PublicDescription": "" + }, + {, + "EventCode": "0x10010", + "EventName": "PM_PMC4_OVERFLOW", + "BriefDescription": "Overflow from counter 4", + "PublicDescription": "" + }, + {, + "EventCode": "0x1001A", + "EventName": "PM_LSU_SRQ_FULL_CYC", + "BriefDescription": "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource", + "PublicDescription": "" + }, + {, + "EventCode": "0x10020", + "EventName": "PM_PMC4_REWIND", + "BriefDescription": "PMC4 Rewind Event", + "PublicDescription": "" + }, + {, + "EventCode": "0x1003A", + "EventName": "PM_CMPLU_STALL_LSU_FIN", + "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x1013E", + "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC", + "BriefDescription": "Marked Load exposed Miss (use edge detect to count #)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C044", + "EventName": "PM_DATA_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x15044", + "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x15046", + "EventName": "PM_IPTEG_FROM_L3.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1015E", + "EventName": "PM_MRK_FAB_RSP_RD_T_INTV", + "BriefDescription": "Sampled Read got a T intervention", + "PublicDescription": "" + }, + {, + "EventCode": "0x14054", + "EventName": "PM_INST_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x15152", + "EventName": "PM_SYNC_MRK_BR_LINK", + "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x1515C", + "EventName": "PM_SYNC_MRK_BR_MPRED", + "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E050", + "EventName": "PM_CMPLU_STALL_TEND", + "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E15E", + "EventName": "PM_MRK_L2_TM_REQ_ABORT", + "BriefDescription": "TM abort", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F054", + "EventName": "PM_TLB_HIT", + "BriefDescription": "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT", + "PublicDescription": "" + }, + {, + "EventCode": "0x1006A", + "EventName": "PM_NTC_ISSUE_HELD_DARQ_FULL", + "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", + "PublicDescription": "" + }, + {, + "EventCode": "0x101E8", + "EventName": "PM_THRESH_EXC_256", + "BriefDescription": "Threshold counter exceed a count of 256", + "PublicDescription": "" + }, + {, + "EventCode": "0x101EC", + "EventName": "PM_THRESH_MET", + "BriefDescription": "threshold exceeded", + "PublicDescription": "" + }, + {, + "EventCode": "0x100F2", + "EventName": "PM_1PLUS_PPC_CMPL", + "BriefDescription": "1 or more ppc insts finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x20114", + "EventName": "PM_MRK_L2_RC_DISP", + "BriefDescription": "Marked Instruction RC dispatched in L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C010", + "EventName": "PM_CMPLU_STALL_LSU", + "BriefDescription": "Completion stall by LSU instruction", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C014", + "EventName": "PM_CMPLU_STALL_STORE_FINISH", + "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C01E", + "EventName": "PM_CMPLU_STALL_SYNC_PMU_INT", + "BriefDescription": "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D01C", + "EventName": "PM_CMPLU_STALL_STCX", + "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E01A", + "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", + "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C124", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C042", + "EventName": "PM_DATA_FROM_L3_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D14C", + "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x25042", + "EventName": "PM_IPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x25044", + "EventName": "PM_IPTEG_FROM_L3.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x2015E", + "EventName": "PM_MRK_FAB_RSP_RWITM_RTY", + "BriefDescription": "Sampled store did a rwitm and got a rty", + "PublicDescription": "" + }, + {, + "EventCode": "0x24050", + "EventName": "PM_IOPS_CMPL", + "BriefDescription": "Internal Operations completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x24154", + "EventName": "PM_THRESH_ACC", + "BriefDescription": "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs.", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F152", + "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC", + "BriefDescription": "cycles L2 RC took for a dclaim", + "PublicDescription": "" + }, + {, + "EventCode": "0x200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "New event for Branch Taken", + "PublicDescription": "" + }, + {, + "EventCode": "0x30014", + "EventName": "PM_CMPLU_STALL_STORE_FIN_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", + "PublicDescription": "" + }, + {, + "EventCode": "0x3001C", + "EventName": "PM_LSU_REJECT_LMQ_FULL", + "BriefDescription": "LSU Reject due to LMQ full (up to 4 per cycles)", + "PublicDescription": "" + }, + {, + "EventCode": "0x30026", + "EventName": "PM_CMPLU_STALL_STORE_DATA", + "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data", + "PublicDescription": "" + }, + {, + "EventCode": "0x3012A", + "EventName": "PM_MRK_L2_RC_DONE", + "BriefDescription": "Marked RC done", + "PublicDescription": "" + }, + {, + "EventCode": "0x35044", + "EventName": "PM_IPTEG_FROM_L3.1_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E04A", + "EventName": "PM_DPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x30154", + "EventName": "PM_MRK_FAB_RSP_DCLAIM", + "BriefDescription": "Marked store had to do a dclaim", + "PublicDescription": "" + }, + {, + "EventCode": "0x3015E", + "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY", + "BriefDescription": "Sampled store did a rwitm and got a rty", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C056", + "EventName": "PM_DTLB_MISS_64K", + "BriefDescription": "Data TLB Miss page size 64K", + "PublicDescription": "" + }, + {, + "EventCode": "0x34050", + "EventName": "PM_INST_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x34052", + "EventName": "PM_INST_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x34056", + "EventName": "PM_CMPLU_STALL_LSU_MFSPR", + "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", + "PublicDescription": "" + }, + {, + "EventCode": "0x3515A", + "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC", + "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3515C", + "EventName": "PM_MRK_DATA_FROM_RL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E15C", + "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", + "BriefDescription": "TM marked store abort for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x30060", + "EventName": "PM_TM_TRANS_RUN_INST", + "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x301E6", + "EventName": "PM_MRK_DERAT_MISS", + "BriefDescription": "Erat Miss (TLB Access) All page sizes", + "PublicDescription": "" + }, + {, + "EventCode": "0x301EA", + "EventName": "PM_THRESH_EXC_1024", + "BriefDescription": "Threshold counter exceeded a value of 1024", + "PublicDescription": "" + }, + {, + "EventCode": "0x300FA", + "EventName": "PM_INST_FROM_L3MISS", + "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet", + "PublicDescription": "" + }, + {, + "EventCode": "0x40116", + "EventName": "PM_MRK_LARX_FIN", + "BriefDescription": "Larx finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C010", + "EventName": "PM_CMPLU_STALL_STORE_PIPE_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C01C", + "EventName": "PM_CMPLU_STALL_ST_FWD", + "BriefDescription": "Completion stall due to store forward", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E012", + "EventName": "PM_CMPLU_STALL_MTFPSCR", + "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E016", + "EventName": "PM_CMPLU_STALL_LSAQ_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C12A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C044", + "EventName": "PM_DATA_FROM_L3.1_ECO_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x45044", + "EventName": "PM_IPTEG_FROM_L3.1_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x45048", + "EventName": "PM_IPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x4504E", + "EventName": "PM_IPTEG_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E042", + "EventName": "PM_DPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4015E", + "EventName": "PM_MRK_FAB_RSP_RD_RTY", + "BriefDescription": "Sampled L2 reads retry count", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C056", + "EventName": "PM_DTLB_MISS_16M", + "BriefDescription": "Data TLB Miss page size 16M", + "PublicDescription": "" + }, + {, + "EventCode": "0x44050", + "EventName": "PM_INST_SYS_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x44052", + "EventName": "PM_INST_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x44056", + "EventName": "PM_VECTOR_ST_CMPL", + "BriefDescription": "Number of vector store instructions completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F150", + "EventName": "PM_MRK_FAB_RSP_RWITM_CYC", + "BriefDescription": "cycles L2 RC took for a rwitm", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json new file mode 100644 index 000000000000..e4d673235830 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -0,0 +1,782 @@ +[ + {, + "EventCode": "0x1002C", + "EventName": "PM_L1_DCACHE_RELOADED_ALL", + "BriefDescription": "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well", + "PublicDescription": "" + }, + {, + "EventCode": "0x10132", + "EventName": "PM_MRK_INST_ISSUED", + "BriefDescription": "Marked instruction issued", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C042", + "EventName": "PM_DATA_FROM_L2", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C046", + "EventName": "PM_DATA_FROM_L3.1_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C048", + "EventName": "PM_DATA_FROM_ON_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x14040", + "EventName": "PM_INST_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x14042", + "EventName": "PM_INST_FROM_L2", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x14046", + "EventName": "PM_INST_FROM_L3.1_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1404C", + "EventName": "PM_INST_FROM_LL4", + "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D14C", + "EventName": "PM_MRK_DATA_FROM_LL4", + "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x15042", + "EventName": "PM_IPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1504E", + "EventName": "PM_IPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E042", + "EventName": "PM_DPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E044", + "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E046", + "EventName": "PM_DPTEG_FROM_L3.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F14A", + "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F14C", + "EventName": "PM_MRK_DPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1005C", + "EventName": "PM_CMPLU_STALL_DP", + "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C052", + "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C054", + "EventName": "PM_DATA_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C05E", + "EventName": "PM_MEM_LOC_THRESH_LSU_MED", + "BriefDescription": "Local memory above threshold for data prefetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x1415E", + "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D058", + "EventName": "PM_DARQ0_10_12_ENTRIES", + "BriefDescription": "Cycles in which 10 or more DARQ entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x15150", + "EventName": "PM_SYNC_MRK_PROBE_NOP", + "BriefDescription": "Marked probeNops which can cause synchronous interrupts", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E052", + "EventName": "PM_CMPLU_STALL_SLB", + "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F150", + "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC", + "BriefDescription": "cycles from L2 rc disp to l2 rc completion", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F05A", + "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F05C", + "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x1006C", + "EventName": "PM_RUN_CYC_ST_MODE", + "BriefDescription": "Cycles run latch is set and core is in ST mode", + "PublicDescription": "" + }, + {, + "EventCode": "0x1016E", + "EventName": "PM_MRK_BR_CMPL", + "BriefDescription": "Branch Instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x101E0", + "EventName": "PM_MRK_INST_DISP", + "BriefDescription": "The thread has dispatched a randomly sampled marked instruction", + "PublicDescription": "" + }, + {, + "EventCode": "0x101E2", + "EventName": "PM_MRK_BR_TAKEN_CMPL", + "BriefDescription": "Marked Branch Taken completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C016", + "EventName": "PM_CMPLU_STALL_PASTE", + "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C01C", + "EventName": "PM_CMPLU_STALL_DMISS_REMOTE", + "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E01E", + "EventName": "PM_CMPLU_STALL_NTC_FLUSH", + "BriefDescription": "Completion stall due to ntc flush", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C128", + "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C12E", + "EventName": "PM_MRK_DATA_FROM_LL4_CYC", + "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D024", + "EventName": "PM_RADIX_PWC_L2_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache.", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D02A", + "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L2", + "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D02E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x20130", + "EventName": "PM_MRK_INST_DECODED", + "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only", + "PublicDescription": "" + }, + {, + "EventCode": "0x20138", + "EventName": "PM_MRK_ST_NEST", + "BriefDescription": "Marked store sent to nest", + "PublicDescription": "" + }, + {, + "EventCode": "0x2013A", + "EventName": "PM_MRK_BRU_FIN", + "BriefDescription": "bru marked instr finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C044", + "EventName": "PM_DATA_FROM_L3.1_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C048", + "EventName": "PM_DATA_FROM_LMEM", + "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C04A", + "EventName": "PM_DATA_FROM_RL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x24044", + "EventName": "PM_INST_FROM_L3.1_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x25040", + "EventName": "PM_IPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E044", + "EventName": "PM_DPTEG_FROM_L3.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E048", + "EventName": "PM_DPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F148", + "EventName": "PM_MRK_DPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x20050", + "EventName": "PM_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C052", + "EventName": "PM_DATA_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C058", + "EventName": "PM_MEM_PREF", + "BriefDescription": "Memory prefetch for this thread. Includes L4", + "PublicDescription": "" + }, + {, + "EventCode": "0x24156", + "EventName": "PM_MRK_STCX_FIN", + "BriefDescription": "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", + "PublicDescription": "" + }, + {, + "EventCode": "0x24158", + "EventName": "PM_MRK_INST", + "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E050", + "EventName": "PM_DARQ0_7_9_ENTRIES", + "BriefDescription": "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E05E", + "EventName": "PM_LMQ_EMPTY_CYC", + "BriefDescription": "Cycles in which the LMQ has no pending load misses for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x200FD", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand iCache Miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x30006", + "EventName": "PM_CMPLU_STALL_OTHER_CMPL", + "BriefDescription": "Instructions the core completed while this tread was stalled", + "PublicDescription": "" + }, + {, + "EventCode": "0x30008", + "EventName": "PM_DISP_STARVED", + "BriefDescription": "Dispatched Starved", + "PublicDescription": "" + }, + {, + "EventCode": "0x3000A", + "EventName": "PM_CMPLU_STALL_PM", + "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x3000E", + "EventName": "PM_FXU_1PLUS_BUSY", + "BriefDescription": "At least one of the 4 FXU units is busy", + "PublicDescription": "" + }, + {, + "EventCode": "0x30028", + "EventName": "PM_CMPLU_STALL_SPEC_FINISH", + "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", + "PublicDescription": "" + }, + {, + "EventCode": "0x3012C", + "EventName": "PM_MRK_ST_FWD", + "BriefDescription": "Marked st forwards", + "PublicDescription": "" + }, + {, + "EventCode": "0x30130", + "EventName": "PM_MRK_INST_FIN", + "BriefDescription": "marked instruction finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x3003A", + "EventName": "PM_CMPLU_STALL_EXCEPTION", + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", + "PublicDescription": "" + }, + {, + "EventCode": "0x3003C", + "EventName": "PM_CMPLU_STALL_NESTED_TEND", + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", + "PublicDescription": "" + }, + {, + "EventCode": "0x3013E", + "EventName": "PM_MRK_STALL_CMPLU_CYC", + "BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C044", + "EventName": "PM_DATA_FROM_L3.1_ECO_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C04A", + "EventName": "PM_DATA_FROM_RMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x34040", + "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x34044", + "EventName": "PM_INST_FROM_L3.1_ECO_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x34048", + "EventName": "PM_INST_FROM_DL2L3_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3404C", + "EventName": "PM_INST_FROM_DL4", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x35046", + "EventName": "PM_IPTEG_FROM_L2.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x3504E", + "EventName": "PM_DARQ0_4_6_ENTRIES", + "BriefDescription": "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E044", + "EventName": "PM_DPTEG_FROM_L3.1_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F144", + "EventName": "PM_MRK_DPTEG_FROM_L3.1_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x30050", + "EventName": "PM_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x30052", + "EventName": "PM_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C050", + "EventName": "PM_DATA_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C052", + "EventName": "PM_DATA_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D05A", + "EventName": "PM_NTC_ISSUE_HELD_OTHER", + "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D05C", + "EventName": "PM_DISP_HELD_HB_FULL", + "BriefDescription": "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E158", + "EventName": "PM_MRK_STCX_FAIL", + "BriefDescription": "marked stcx failed", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F056", + "EventName": "PM_RADIX_PWC_L3_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache.", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F058", + "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F05E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x30064", + "EventName": "PM_DARQ_STORE_XMIT", + "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core", + "PublicDescription": "" + }, + {, + "EventCode": "0x30068", + "EventName": "PM_L1_ICACHE_RELOADED_PREF", + "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x301E4", + "EventName": "PM_MRK_BR_MPRED_CMPL", + "BriefDescription": "Marked Branch Mispredicted", + "PublicDescription": "" + }, + {, + "EventCode": "0x300F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "# PPC Dispatched", + "PublicDescription": "" + }, + {, + "EventCode": "0x300F6", + "EventName": "PM_L1_DCACHE_RELOAD_VALID", + "BriefDescription": "DL1 reloaded due to Demand Load", + "PublicDescription": "" + }, + {, + "EventCode": "0x300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4000A", + "EventName": "PM_ISQ_36_44_ENTRIES", + "BriefDescription": "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core", + "PublicDescription": "" + }, + {, + "EventCode": "0x4000C", + "EventName": "PM_FREQ_UP", + "BriefDescription": "Power Management: Above Threshold A", + "PublicDescription": "" + }, + {, + "EventCode": "0x40012", + "EventName": "PM_L1_ICACHE_RELOADED_ALL", + "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D01A", + "EventName": "PM_CMPLU_STALL_EIEIO", + "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E014", + "EventName": "PM_TM_TX_PASS_RUN_INST", + "BriefDescription": "Run instructions spent in successful transactions", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E018", + "EventName": "PM_CMPLU_STALL_NTC_DISP_FIN", + "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C122", + "EventName": "PM_DARQ1_0_3_ENTRIES", + "BriefDescription": "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x40134", + "EventName": "PM_MRK_INST_TIMEO", + "BriefDescription": "marked Instruction finish timeout (instruction lost)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4003C", + "EventName": "PM_DISP_HELD_SYNC_HOLD", + "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C04A", + "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C04E", + "EventName": "PM_DATA_FROM_L3MISS_MOD", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x44040", + "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x44048", + "EventName": "PM_INST_FROM_DL2L3_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4404C", + "EventName": "PM_INST_FROM_DMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4404E", + "EventName": "PM_INST_FROM_L3MISS_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D142", + "EventName": "PM_MRK_DATA_FROM_L3", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D04A", + "EventName": "PM_DARQ0_0_3_ENTRIES", + "BriefDescription": "Cycles in which 3 or less DARQ entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x45042", + "EventName": "PM_IPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x45046", + "EventName": "PM_IPTEG_FROM_L2.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F144", + "EventName": "PM_MRK_DPTEG_FROM_L3.1_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F14E", + "EventName": "PM_MRK_DPTEG_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C050", + "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C052", + "EventName": "PM_DATA_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4405E", + "EventName": "PM_DARQ_STORE_REJECT", + "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D058", + "EventName": "PM_VECTOR_FLOP_CMPL", + "BriefDescription": "Vector FP instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D05A", + "EventName": "PM_NON_MATH_FLOP_CMPL", + "BriefDescription": "Non FLOP operation completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4505A", + "EventName": "PM_SP_FLOP_CMPL", + "BriefDescription": "SP instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F056", + "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3MISS", + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F058", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F05A", + "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3", + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F05E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x401E0", + "EventName": "PM_MRK_INST_CMPL", + "BriefDescription": "marked instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x400F4", + "EventName": "PM_RUN_PURR", + "BriefDescription": "Run_PURR", + "PublicDescription": "" + }, + {, + "EventCode": "0x400FC", + "EventName": "PM_ITLB_MISS", + "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed", + "PublicDescription": "" + }, + {, + "EventCode": "0x400FE", + "EventName": "PM_DATA_FROM_MEMORY", + "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json new file mode 100644 index 000000000000..e48708c10222 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json @@ -0,0 +1,158 @@ +[ + {, + "EventCode": "0x10008", + "EventName": "PM_RUN_SPURR", + "BriefDescription": "Run SPURR", + "PublicDescription": "" + }, + {, + "EventCode": "0x1000A", + "EventName": "PM_PMC3_REWIND", + "BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change.", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C040", + "EventName": "PM_DATA_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C050", + "EventName": "PM_DATA_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D15E", + "EventName": "PM_MRK_RUN_CYC", + "BriefDescription": "Run cycles in which a marked instruction is in the pipeline", + "PublicDescription": "" + }, + {, + "EventCode": "0x15158", + "EventName": "PM_SYNC_MRK_L2HIT", + "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x20010", + "EventName": "PM_PMC1_OVERFLOW", + "BriefDescription": "Overflow from counter 1", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C040", + "EventName": "PM_DATA_FROM_L2_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2005A", + "EventName": "PM_DARQ1_7_9_ENTRIES", + "BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C05C", + "EventName": "PM_INST_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D156", + "EventName": "PM_MRK_DTLB_MISS_4K", + "BriefDescription": "Marked Data TLB Miss page size 4k", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E05A", + "EventName": "PM_LRQ_REJECT", + "BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E05C", + "EventName": "PM_LSU_REJECT_ERAT_MISS", + "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)", + "PublicDescription": "" + }, + {, + "EventCode": "0x200F6", + "EventName": "PM_LSU_DERAT_MISS", + "BriefDescription": "DERAT Reloaded due to a DERAT miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C048", + "EventName": "PM_DATA_FROM_DL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3404A", + "EventName": "PM_INST_FROM_RMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C058", + "EventName": "PM_LARX_FIN", + "BriefDescription": "Larx finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E050", + "EventName": "PM_DARQ1_4_6_ENTRIES", + "BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x3006E", + "EventName": "PM_NEST_REF_CLK", + "BriefDescription": "Multiply by 4 to obtain the number of PB cycles", + "PublicDescription": "" + }, + {, + "EventCode": "0x301E2", + "EventName": "PM_MRK_ST_CMPL", + "BriefDescription": "Marked store completed and sent to nest", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D02C", + "EventName": "PM_PMC1_REWIND", + "BriefDescription": "", + "PublicDescription": "" + }, + {, + "EventCode": "0x4003E", + "EventName": "PM_LD_CMPL", + "BriefDescription": "count of Loads completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C040", + "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C042", + "EventName": "PM_DATA_FROM_L3", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C048", + "EventName": "PM_DATA_FROM_DL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D056", + "EventName": "PM_NON_FMA_FLOP_CMPL", + "BriefDescription": "Non FMA instruction completed", + "PublicDescription": "" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json new file mode 100644 index 000000000000..396e6e061d91 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -0,0 +1,836 @@ +[ + {, + "EventCode": "0x1001C", + "EventName": "PM_CMPLU_STALL_THRD", + "BriefDescription": "Completion Stalled because the thread was blocked", + "PublicDescription": "" + }, + {, + "EventCode": "0x1002E", + "EventName": "PM_LMQ_MERGE", + "BriefDescription": "A demand miss collides with a prefetch for the same line", + "PublicDescription": "" + }, + {, + "EventCode": "0x10134", + "EventName": "PM_MRK_ST_DONE_L2", + "BriefDescription": "marked store completed in L2 ( RC machine done)", + "PublicDescription": "" + }, + {, + "EventCode": "0x10138", + "EventName": "PM_MRK_BR_2PATH", + "BriefDescription": "marked branches which are not strongly biased", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C04A", + "EventName": "PM_DATA_FROM_RL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C04C", + "EventName": "PM_DATA_FROM_LL4", + "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D140", + "EventName": "PM_MRK_DATA_FROM_L3.1_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D144", + "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D146", + "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC", + "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D148", + "EventName": "PM_MRK_DATA_FROM_RMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D14E", + "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", + "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x15040", + "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1504C", + "EventName": "PM_IPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E048", + "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E04E", + "EventName": "PM_DPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F146", + "EventName": "PM_MRK_DPTEG_FROM_L3.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x10052", + "EventName": "PM_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C05C", + "EventName": "PM_DTLB_MISS_2M", + "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used", + "PublicDescription": "" + }, + {, + "EventCode": "0x14156", + "EventName": "PM_MRK_DATA_FROM_L2_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x14158", + "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1415C", + "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D150", + "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D152", + "EventName": "PM_MRK_DATA_FROM_DL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D156", + "EventName": "PM_MRK_LD_MISS_L1_CYC", + "BriefDescription": "Marked ld latency", + "PublicDescription": "" + }, + {, + "EventCode": "0x15154", + "EventName": "PM_SYNC_MRK_L3MISS", + "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x1515A", + "EventName": "PM_SYNC_MRK_L2MISS", + "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E05A", + "EventName": "PM_CMPLU_STALL_ANY_SYNC", + "BriefDescription": "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete ", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E05C", + "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F152", + "EventName": "PM_MRK_FAB_RSP_BKILL_CYC", + "BriefDescription": "cycles L2 RC took for a bkill", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F056", + "EventName": "PM_RADIX_PWC_L1_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit.", + "PublicDescription": "" + }, + {, + "EventCode": "0x101E4", + "EventName": "PM_MRK_L1_ICACHE_MISS", + "BriefDescription": "sampled Instruction suffered an icache Miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x101EA", + "EventName": "PM_MRK_L1_RELOAD_VALID", + "BriefDescription": "Marked demand reload", + "PublicDescription": "" + }, + {, + "EventCode": "0x100FA", + "EventName": "PM_ANY_THRD_RUN_CYC", + "BriefDescription": "Cycles in which at least one thread has the run latch set", + "PublicDescription": "" + }, + {, + "EventCode": "0x100FC", + "EventName": "PM_LD_REF_L1", + "BriefDescription": "All L1 D cache load references counted at finish, gated by reject", + "PublicDescription": "" + }, + {, + "EventCode": "0x20006", + "EventName": "PM_DISP_HELD_ISSQ_FULL", + "BriefDescription": "Dispatch held due to Issue q full. Includes issue queue and branch queue", + "PublicDescription": "" + }, + {, + "EventCode": "0x2000C", + "EventName": "PM_THRD_ALL_RUN_CYC", + "BriefDescription": "Cycles in which all the threads have the run latch set", + "PublicDescription": "" + }, + {, + "EventCode": "0x2001A", + "EventName": "PM_NTC_ALL_FIN", + "BriefDescription": "Cycles after all instructions have finished to group completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D014", + "EventName": "PM_CMPLU_STALL_LRQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D018", + "EventName": "PM_CMPLU_STALL_EXEC_UNIT", + "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D01E", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_ISSQ", + "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E014", + "EventName": "PM_STCX_FIN", + "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C120", + "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C122", + "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C126", + "EventName": "PM_MRK_DATA_FROM_L2", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C12A", + "EventName": "PM_MRK_DATA_FROM_RMEM_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C12C", + "EventName": "PM_MRK_DATA_FROM_DL4_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D120", + "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D026", + "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L2", + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x20132", + "EventName": "PM_MRK_DFU_FIN", + "BriefDescription": "Decimal Unit marked Instruction Finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x20134", + "EventName": "PM_MRK_FXU_FIN", + "BriefDescription": "fxu marked instr finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C04E", + "EventName": "PM_LD_MISS_L1_FIN", + "BriefDescription": "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op.", + "PublicDescription": "" + }, + {, + "EventCode": "0x24040", + "EventName": "PM_INST_FROM_L2_MEPF", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x24048", + "EventName": "PM_INST_FROM_LMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D142", + "EventName": "PM_MRK_DATA_FROM_L3_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D144", + "EventName": "PM_MRK_DATA_FROM_L3.1_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D148", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x25048", + "EventName": "PM_IPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E040", + "EventName": "PM_DPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E04A", + "EventName": "PM_DPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F14A", + "EventName": "PM_MRK_DPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x20054", + "EventName": "PM_L1_PREF", + "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x20056", + "EventName": "PM_TAKEN_BR_MPRED_CMPL", + "BriefDescription": "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions", + "PublicDescription": "" + }, + {, + "EventCode": "0x20058", + "EventName": "PM_DARQ1_10_12_ENTRIES", + "BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C050", + "EventName": "PM_DATA_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C05E", + "EventName": "PM_INST_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2505C", + "EventName": "PM_VSU_FIN", + "BriefDescription": "VSU instruction finished. Up to 4 per cycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x2505E", + "EventName": "PM_BACK_BR_CMPL", + "BriefDescription": "Branch instruction completed with a target address less than current instruction address", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E052", + "EventName": "PM_TM_PASSED", + "BriefDescription": "Number of TM transactions that passed", + "PublicDescription": "" + }, + {, + "EventCode": "0x20064", + "EventName": "PM_IERAT_RELOAD_4K", + "BriefDescription": "IERAT reloaded (after a miss) for 4K pages", + "PublicDescription": "" + }, + {, + "EventCode": "0x2006C", + "EventName": "PM_RUN_CYC_SMT4_MODE", + "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode", + "PublicDescription": "" + }, + {, + "EventCode": "0x201E0", + "EventName": "PM_MRK_DATA_FROM_MEMORY", + "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x201E4", + "EventName": "PM_MRK_DATA_FROM_L3MISS", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x201E8", + "EventName": "PM_THRESH_EXC_512", + "BriefDescription": "Threshold counter exceeded a value of 512", + "PublicDescription": "" + }, + {, + "EventCode": "0x200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "# PPC Dispatched", + "PublicDescription": "" + }, + {, + "EventCode": "0x30016", + "EventName": "PM_CMPLU_STALL_SRQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", + "PublicDescription": "" + }, + {, + "EventCode": "0x30018", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", + "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3001A", + "EventName": "PM_DATA_TABLEWALK_CYC", + "BriefDescription": "Data Tablewalk Cycles. Could be 1 or 2 active tablewalks. Includes data prefetches.", + "PublicDescription": "" + }, + {, + "EventCode": "0x30132", + "EventName": "PM_MRK_VSU_FIN", + "BriefDescription": "VSU marked instr finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x30134", + "EventName": "PM_MRK_ST_CMPL_INT", + "BriefDescription": "marked store finished with intervention", + "PublicDescription": "" + }, + {, + "EventCode": "0x30038", + "EventName": "PM_CMPLU_STALL_DMISS_LMEM", + "BriefDescription": "Completion stall due to cache miss that resolves in local memory", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C040", + "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C042", + "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D140", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D144", + "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D146", + "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D14C", + "EventName": "PM_MRK_DATA_FROM_DMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D14E", + "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x35042", + "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x35048", + "EventName": "PM_IPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x3504C", + "EventName": "PM_IPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F146", + "EventName": "PM_MRK_DPTEG_FROM_L2.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3005A", + "EventName": "PM_ISQ_0_8_ENTRIES", + "BriefDescription": "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x3005C", + "EventName": "PM_BFU_BUSY", + "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C05E", + "EventName": "PM_MEM_RWITM", + "BriefDescription": "Memory Read With Intent to Modify for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x34054", + "EventName": "PM_PARTIAL_ST_FIN", + "BriefDescription": "Any store finished by an LSU slice", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D15E", + "EventName": "PM_MULT_MRK", + "BriefDescription": "mult marked instr", + "PublicDescription": "" + }, + {, + "EventCode": "0x35152", + "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x35154", + "EventName": "PM_MRK_DATA_FROM_L3_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x35156", + "EventName": "PM_MRK_DATA_FROM_L3.1_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x35158", + "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3515E", + "EventName": "PM_MRK_BACK_BR_CMPL", + "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E05E", + "EventName": "PM_L3_CO_MEPF", + "BriefDescription": "L3 castouts in Mepf state for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F150", + "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", + "BriefDescription": "cycles to drain st from core to L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F054", + "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x30162", + "EventName": "PM_MRK_LSU_DERAT_MISS", + "BriefDescription": "Marked derat reload (miss) for any page size", + "PublicDescription": "" + }, + {, + "EventCode": "0x3006A", + "EventName": "PM_IERAT_RELOAD_64K", + "BriefDescription": "IERAT Reloaded (Miss) for a 64k page", + "PublicDescription": "" + }, + {, + "EventCode": "0x300F8", + "EventName": "PM_TB_BIT_TRANS", + "BriefDescription": "timebase event", + "PublicDescription": "" + }, + {, + "EventCode": "0x40006", + "EventName": "PM_ISLB_MISS", + "BriefDescription": "Number of ISLB misses for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x40008", + "EventName": "PM_SRQ_EMPTY_CYC", + "BriefDescription": "Cycles in which the SRQ has at least one (out of four) empty slice", + "PublicDescription": "" + }, + {, + "EventCode": "0x40014", + "EventName": "PM_PROBE_NOP_DISP", + "BriefDescription": "ProbeNops dispatched", + "PublicDescription": "" + }, + {, + "EventCode": "0x4001C", + "EventName": "PM_INST_IMC_MATCH_CMPL", + "BriefDescription": "IMC Match Count", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C01A", + "EventName": "PM_CMPLU_STALL_DMISS_L3MISS", + "BriefDescription": "Completion stall due to cache miss resolving missed the L3", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D012", + "EventName": "PM_PMC3_SAVED", + "BriefDescription": "PMC3 Rewind Value saved", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E11E", + "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C124", + "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D12E", + "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4013A", + "EventName": "PM_MRK_IC_MISS", + "BriefDescription": "Marked instruction experienced I cache miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x44044", + "EventName": "PM_INST_FROM_L3.1_ECO_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x44046", + "EventName": "PM_INST_FROM_L2.1_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4404A", + "EventName": "PM_INST_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D144", + "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D146", + "EventName": "PM_MRK_DATA_FROM_L2.1_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4504C", + "EventName": "PM_IPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E044", + "EventName": "PM_DPTEG_FROM_L3.1_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E04A", + "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x40154", + "EventName": "PM_MRK_FAB_RSP_BKILL", + "BriefDescription": "Marked store had to do a bkill", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C054", + "EventName": "PM_DERAT_MISS_16G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C05A", + "EventName": "PM_DTLB_MISS_1G", + "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used", + "PublicDescription": "" + }, + {, + "EventCode": "0x44054", + "EventName": "PM_VECTOR_LD_CMPL", + "BriefDescription": "Number of vector load instructions completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D05E", + "EventName": "PM_BR_CMPL", + "BriefDescription": "Any Branch instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x45054", + "EventName": "PM_FMA_CMPL", + "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. ", + "PublicDescription": "" + }, + {, + "EventCode": "0x45056", + "EventName": "PM_SCALAR_FLOP_CMPL", + "BriefDescription": "Scalar flop operation completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4505C", + "EventName": "PM_MATH_FLOP_CMPL", + "BriefDescription": "Math flop instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E05E", + "EventName": "PM_TM_OUTER_TBEGIN_DISP", + "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F054", + "EventName": "PM_RADIX_PWC_MISS", + "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache.", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F05C", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x401E6", + "EventName": "PM_MRK_INST_FROM_L3MISS", + "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet", + "PublicDescription": "" + }, + {, + "EventCode": "0x401E8", + "EventName": "PM_MRK_DATA_FROM_L2MISS", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x400FA", + "EventName": "PM_RUN_INST_CMPL", + "BriefDescription": "Run_Instructions", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json new file mode 100644 index 000000000000..077c3527967f --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -0,0 +1,680 @@ +[ + {, + "EventCode": "0x1E", + "EventName": "PM_CYC", + "BriefDescription": "Cycles", + "PublicDescription": "" + }, + {, + "EventCode": "0x100F0", + "EventName": "PM_CYC", + "BriefDescription": "Cycles", + "PublicDescription": "" + }, + {, + "EventCode": "0x2", + "EventName": "PM_INST_CMPL", + "BriefDescription": "Number of PowerPC Instructions that completed.", + "PublicDescription": "" + }, + {, + "EventCode": "0x100FE", + "EventName": "PM_INST_CMPL", + "BriefDescription": "# PPC instructions completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x10006", + "EventName": "PM_DISP_HELD", + "BriefDescription": "Dispatch Held", + "PublicDescription": "" + }, + {, + "EventCode": "0x10016", + "EventName": "PM_DSLB_MISS", + "BriefDescription": "Data SLB Miss - Total of all segment sizes", + "PublicDescription": "" + }, + {, + "EventCode": "0x10018", + "EventName": "PM_IC_DEMAND_CYC", + "BriefDescription": "Icache miss demand cycles", + "PublicDescription": "" + }, + {, + "EventCode": "0x10022", + "EventName": "PM_PMC2_SAVED", + "BriefDescription": "PMC2 Rewind Value saved", + "PublicDescription": "" + }, + {, + "EventCode": "0x10024", + "EventName": "PM_PMC5_OVERFLOW", + "BriefDescription": "Overflow from counter 5", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D14A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E040", + "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E04A", + "EventName": "PM_DPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F140", + "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F142", + "EventName": "PM_MRK_DPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F144", + "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F148", + "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x10050", + "EventName": "PM_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x10054", + "EventName": "PM_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x10056", + "EventName": "PM_MEM_READ", + "BriefDescription": "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4", + "PublicDescription": "" + }, + {, + "EventCode": "0x1005A", + "EventName": "PM_CMPLU_STALL_DFLONG", + "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C056", + "EventName": "PM_DERAT_MISS_4K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C05A", + "EventName": "PM_DERAT_MISS_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x14050", + "EventName": "PM_INST_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x14052", + "EventName": "PM_INST_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D154", + "EventName": "PM_MRK_DATA_FROM_L2.1_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x15156", + "EventName": "PM_SYNC_MRK_FX_DIVIDE", + "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E054", + "EventName": "PM_CMPLU_STALL", + "BriefDescription": "Nothing completed and ICT not empty", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F058", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x10062", + "EventName": "PM_LD_L3MISS_PEND_CYC", + "BriefDescription": "Cycles L3 miss was pending for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x10064", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN", + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", + "PublicDescription": "" + }, + {, + "EventCode": "0x10068", + "EventName": "PM_BRU_FIN", + "BriefDescription": "Branch Instruction Finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x100F6", + "EventName": "PM_IERAT_RELOAD", + "BriefDescription": "Number of I-ERAT reloads", + "PublicDescription": "" + }, + {, + "EventCode": "0x100F8", + "EventName": "PM_ICT_NOSLOT_CYC", + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x20008", + "EventName": "PM_ICT_EMPTY_CYC", + "BriefDescription": "Cycles in which the ICT is completely empty. No itags are assigned to any thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x2000A", + "EventName": "PM_HV_CYC", + "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration", + "PublicDescription": "" + }, + {, + "EventCode": "0x2000E", + "EventName": "PM_FXU_BUSY", + "BriefDescription": "Cycles in which all 4 FXUs are busy. The FXU is running at capacity", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C018", + "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", + "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D012", + "EventName": "PM_CMPLU_STALL_DFU", + "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D01A", + "EventName": "PM_ICT_NOSLOT_IC_MISS", + "BriefDescription": "Ict empty for this thread due to Icache Miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E012", + "EventName": "PM_TM_TX_PASS_RUN_CYC", + "BriefDescription": "cycles spent in successful transactions", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E016", + "EventName": "PM_NTC_ISSUE_HELD_ARB", + "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C046", + "EventName": "PM_DATA_FROM_RL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2404C", + "EventName": "PM_INST_FROM_MEMORY", + "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D14E", + "EventName": "PM_MRK_DATA_FROM_L2.1_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E042", + "EventName": "PM_DPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E046", + "EventName": "PM_DPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F142", + "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F144", + "EventName": "PM_MRK_DPTEG_FROM_L3.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F146", + "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2F14C", + "EventName": "PM_MRK_DPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x20052", + "EventName": "PM_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C054", + "EventName": "PM_DERAT_MISS_64K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C05A", + "EventName": "PM_DERAT_MISS_1G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x24052", + "EventName": "PM_FXU_IDLE", + "BriefDescription": "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle", + "PublicDescription": "" + }, + {, + "EventCode": "0x2405A", + "EventName": "PM_NTC_FIN", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D150", + "EventName": "PM_MRK_DERAT_MISS_4K", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D152", + "EventName": "PM_MRK_DERAT_MISS_2M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x20066", + "EventName": "PM_TLB_MISS", + "BriefDescription": "TLB Miss (I + D)", + "PublicDescription": "" + }, + {, + "EventCode": "0x201E2", + "EventName": "PM_MRK_LD_MISS_L1", + "BriefDescription": "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", + "PublicDescription": "" + }, + {, + "EventCode": "0x200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Run_cycles", + "PublicDescription": "" + }, + {, + "EventCode": "0x200F8", + "EventName": "PM_EXT_INT", + "BriefDescription": "external interrupt", + "PublicDescription": "" + }, + {, + "EventCode": "0x30004", + "EventName": "PM_CMPLU_STALL_EMQ_FULL", + "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", + "PublicDescription": "" + }, + {, + "EventCode": "0x30020", + "EventName": "PM_PMC2_REWIND", + "BriefDescription": "PMC2 Rewind Event (did not match condition)", + "PublicDescription": "" + }, + {, + "EventCode": "0x30022", + "EventName": "PM_PMC4_SAVED", + "BriefDescription": "PMC4 Rewind Value saved (matched condition)", + "PublicDescription": "" + }, + {, + "EventCode": "0x30024", + "EventName": "PM_PMC6_OVERFLOW", + "BriefDescription": "Overflow from counter 6", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C04C", + "EventName": "PM_DATA_FROM_DL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D148", + "EventName": "PM_MRK_DATA_FROM_L2.1_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E042", + "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E046", + "EventName": "PM_DPTEG_FROM_L2.1_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F148", + "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F14C", + "EventName": "PM_MRK_DPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x30056", + "EventName": "PM_TM_ABORTS", + "BriefDescription": "Number of TM transactions aborted", + "PublicDescription": "" + }, + {, + "EventCode": "0x30058", + "EventName": "PM_TLBIE_FIN", + "BriefDescription": "tlbie finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C054", + "EventName": "PM_DERAT_MISS_16M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M", + "PublicDescription": "" + }, + {, + "EventCode": "0x34058", + "EventName": "PM_ICT_NOSLOT_BR_MPRED_ICMISS", + "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", + "PublicDescription": "" + }, + {, + "EventCode": "0x3405C", + "EventName": "PM_CMPLU_STALL_DPLONG", + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x3405E", + "EventName": "PM_IFETCH_THROTTLE", + "BriefDescription": "Cycles in which Instruction fetch throttle was active.", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D152", + "EventName": "PM_MRK_DERAT_MISS_1G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D154", + "EventName": "PM_MRK_DERAT_MISS_16M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D058", + "EventName": "PM_VSU_DP_FSQRT_FDIV", + "BriefDescription": "vector versions of fdiv,fsqrt", + "PublicDescription": "" + }, + {, + "EventCode": "0x35150", + "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E052", + "EventName": "PM_ICT_NOSLOT_IC_L3", + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F05A", + "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "Data PTEG reload", + "PublicDescription": "" + }, + {, + "EventCode": "0x40004", + "EventName": "PM_FXU_FIN", + "BriefDescription": "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete.", + "PublicDescription": "" + }, + {, + "EventCode": "0x40010", + "EventName": "PM_PMC3_OVERFLOW", + "BriefDescription": "Overflow from counter 3", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C012", + "EventName": "PM_CMPLU_STALL_ERAT_MISS", + "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D01C", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_SYNC", + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D01E", + "EventName": "PM_ICT_NOSLOT_BR_MPRED", + "BriefDescription": "Ict empty for this thread due to branch mispred", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E010", + "EventName": "PM_ICT_NOSLOT_IC_L3MISS", + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E01A", + "EventName": "PM_ICT_NOSLOT_DISP_HELD", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C120", + "EventName": "PM_MRK_DATA_FROM_L2_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D124", + "EventName": "PM_MRK_DATA_FROM_L3.1_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C04C", + "EventName": "PM_DATA_FROM_DMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D140", + "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D04C", + "EventName": "PM_DFU_BUSY", + "BriefDescription": "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D04E", + "EventName": "PM_VSU_FSQRT_FDIV", + "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E046", + "EventName": "PM_DPTEG_FROM_L2.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F146", + "EventName": "PM_MRK_DPTEG_FROM_L2.1_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F14A", + "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F14C", + "EventName": "PM_MRK_DPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x40052", + "EventName": "PM_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C058", + "EventName": "PM_MEM_CO", + "BriefDescription": "Memory castouts from this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C15C", + "EventName": "PM_MRK_DERAT_MISS_16G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D050", + "EventName": "PM_VSU_NON_FLOP_CMPL", + "BriefDescription": "Non FLOP operation completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D052", + "EventName": "PM_2FLOP_CMPL", + "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg ", + "PublicDescription": "" + }, + {, + "EventCode": "0x45058", + "EventName": "PM_IC_MISS_CMPL", + "BriefDescription": "Non-speculative icache miss, counted at completion", + "PublicDescription": "" + }, + {, + "EventCode": "0x40062", + "EventName": "PM_DUMMY1_REMOVE_ME", + "BriefDescription": "Space holder for L2_PC_PM_MK_LDST_SCOPE_PRED_STATUS", + "PublicDescription": "" + }, + {, + "EventCode": "0x40064", + "EventName": "PM_DUMMY2_REMOVE_ME", + "BriefDescription": "Space holder for LS_PC_RELOAD_RA", + "PublicDescription": "" + }, + {, + "EventCode": "0x4006A", + "EventName": "PM_IERAT_RELOAD_16M", + "BriefDescription": "IERAT Reloaded (Miss) for a 16M page", + "PublicDescription": "" + }, + {, + "EventCode": "0x401EC", + "EventName": "PM_THRESH_EXC_2048", + "BriefDescription": "Threshold counter exceeded a value of 2048", + "PublicDescription": "" + }, + {, + "EventCode": "0x400F2", + "EventName": "PM_1PLUS_PPC_DISP", + "BriefDescription": "Cycles at least one Instr Dispatched", + "PublicDescription": "" + }, + {, + "EventCode": "0x400F8", + "EventName": "PM_FLUSH", + "BriefDescription": "Flush (any type)", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json new file mode 100644 index 000000000000..32ce71135f77 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -0,0 +1,146 @@ +[ + {, + "EventCode": "0x0", + "EventName": "PM_SUSPENDED", + "BriefDescription": "Counter OFF", + "PublicDescription": "" + }, + {, + "EventCode": "0x10026", + "EventName": "PM_TABLEWALK_CYC", + "BriefDescription": "Cycles when an instruction tablewalk is active", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E04C", + "EventName": "PM_DPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F14E", + "EventName": "PM_MRK_DPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x10060", + "EventName": "PM_TM_TRANS_RUN_CYC", + "BriefDescription": "run cycles in transactional state", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C012", + "EventName": "PM_CMPLU_STALL_DCACHE_MISS", + "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E04C", + "EventName": "PM_DPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x2C056", + "EventName": "PM_DTLB_MISS_4K", + "BriefDescription": "Data TLB Miss page size 4k", + "PublicDescription": "" + }, + {, + "EventCode": "0x3000C", + "EventName": "PM_FREQ_DOWN", + "BriefDescription": "Power Management: Below Threshold B", + "PublicDescription": "" + }, + {, + "EventCode": "0x3D142", + "EventName": "PM_MRK_DATA_FROM_LMEM", + "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x3F142", + "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x301E8", + "EventName": "PM_THRESH_EXC_64", + "BriefDescription": "Threshold counter exceeded a value of 64", + "PublicDescription": "" + }, + {, + "EventCode": "0x40118", + "EventName": "PM_MRK_DCACHE_RELOAD_INTV", + "BriefDescription": "Combined Intervention event", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C01E", + "EventName": "PM_CMPLU_STALL_CRYPTO", + "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D018", + "EventName": "PM_CMPLU_STALL_BRU", + "BriefDescription": "Completion stall due to a Branch Unit", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D128", + "EventName": "PM_MRK_DATA_FROM_LMEM_CYC", + "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E04E", + "EventName": "PM_DPTEG_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F142", + "EventName": "PM_MRK_DPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4F148", + "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x40050", + "EventName": "PM_SYS_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", + "PublicDescription": "" + }, + {, + "EventCode": "0x40056", + "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", + "BriefDescription": "Local memory above threshold for LSU medium", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D054", + "EventName": "PM_8FLOP_CMPL", + "BriefDescription": "8 FLOP instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x45050", + "EventName": "PM_1FLOP_CMPL", + "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x45052", + "EventName": "PM_4FLOP_CMPL", + "BriefDescription": "4 FLOP instruction completed", + "PublicDescription": "" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json new file mode 100644 index 000000000000..d75859836f14 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -0,0 +1,272 @@ +[ + {, + "EventCode": "0x10028", + "EventName": "PM_STALL_END_ICT_EMPTY", + "BriefDescription": "The number a times the core transitioned from a stall to ICT-empty for this thread", + "PublicDescription": "" + }, + {, + "EventCode": "0x1C04E", + "EventName": "PM_DATA_FROM_L2MISS_MOD", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x14044", + "EventName": "PM_INST_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1404E", + "EventName": "PM_INST_FROM_L2MISS", + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x1D142", + "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x15048", + "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1504A", + "EventName": "PM_IPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x1E058", + "EventName": "PM_STCX_FAIL", + "BriefDescription": "stcx failed", + "PublicDescription": "" + }, + {, + "EventCode": "0x1F15E", + "EventName": "PM_MRK_PROBE_NOP_CMPL", + "BriefDescription": "Marked probeNops completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x20112", + "EventName": "PM_MRK_NTF_FIN", + "BriefDescription": "Marked next to finish instruction finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x20016", + "EventName": "PM_ST_FIN", + "BriefDescription": "Store finish count. Includes speculative activity", + "PublicDescription": "" + }, + {, + "EventCode": "0x20018", + "EventName": "PM_ST_FWD", + "BriefDescription": "Store forwards that finished", + "PublicDescription": "" + }, + {, + "EventCode": "0x2011C", + "EventName": "PM_MRK_NTC_CYC", + "BriefDescription": "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E018", + "EventName": "PM_CMPLU_STALL_VFXLONG", + "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2E01C", + "EventName": "PM_CMPLU_STALL_TLBIE", + "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2", + "PublicDescription": "" + }, + {, + "EventCode": "0x2003E", + "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC", + "BriefDescription": "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)", + "PublicDescription": "" + }, + {, + "EventCode": "0x24042", + "EventName": "PM_INST_FROM_L3_MEPF", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x2D14A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", + "PublicDescription": "" + }, + {, + "EventCode": "0x25046", + "EventName": "PM_IPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x2504A", + "EventName": "PM_IPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x2504C", + "EventName": "PM_IPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x201E6", + "EventName": "PM_THRESH_EXC_32", + "BriefDescription": "Threshold counter exceeded a value of 32", + "PublicDescription": "" + }, + {, + "EventCode": "0x200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue).", + "PublicDescription": "" + }, + {, + "EventCode": "0x200FE", + "EventName": "PM_DATA_FROM_L2MISS", + "BriefDescription": "Demand LD - L2 Miss (not L2 hit)", + "PublicDescription": "" + }, + {, + "EventCode": "0x30010", + "EventName": "PM_PMC2_OVERFLOW", + "BriefDescription": "Overflow from counter 2", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C046", + "EventName": "PM_DATA_FROM_L2.1_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x34042", + "EventName": "PM_INST_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x34046", + "EventName": "PM_INST_FROM_L2.1_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x3504A", + "EventName": "PM_IPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E048", + "EventName": "PM_DPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3E04C", + "EventName": "PM_DPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C05A", + "EventName": "PM_CMPLU_STALL_VDPLONG", + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", + "PublicDescription": "" + }, + {, + "EventCode": "0x3C05C", + "EventName": "PM_CMPLU_STALL_VFXU", + "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "PublicDescription": "" + }, + {, + "EventCode": "0x30066", + "EventName": "PM_LSU_FIN", + "BriefDescription": "LSU Finished a PPC instruction (up to 4 per cycle)", + "PublicDescription": "" + }, + {, + "EventCode": "0x300F0", + "EventName": "PM_ST_MISS_L1", + "BriefDescription": "Store Missed L1", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D010", + "EventName": "PM_PMC1_SAVED", + "BriefDescription": "PMC1 Rewind Value saved", + "PublicDescription": "" + }, + {, + "EventCode": "0x40132", + "EventName": "PM_MRK_LSU_FIN", + "BriefDescription": "lsu marked instr PPC finish", + "PublicDescription": "" + }, + {, + "EventCode": "0x4C046", + "EventName": "PM_DATA_FROM_L2.1_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load", + "PublicDescription": "" + }, + {, + "EventCode": "0x44042", + "EventName": "PM_INST_FROM_L3", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)", + "PublicDescription": "" + }, + {, + "EventCode": "0x4504A", + "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E048", + "EventName": "PM_DPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E04C", + "EventName": "PM_DPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", + "PublicDescription": "" + }, + {, + "EventCode": "0x4405C", + "EventName": "PM_CMPLU_STALL_VDP", + "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector", + "PublicDescription": "" + }, + {, + "EventCode": "0x4D05C", + "EventName": "PM_DP_QP_FLOP_CMPL", + "BriefDescription": "Double-Precion or Quad-Precision instruction completed", + "PublicDescription": "" + }, + {, + "EventCode": "0x4E05C", + "EventName": "PM_LSU_REJECT_LHS", + "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)", + "PublicDescription": "" + } +] From 80e63ffb09a1b7ac645e9dd1c16b7b08956a7f5b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 11 Jul 2017 13:00:31 -0500 Subject: [PATCH 030/253] perf vendor events: Add POWER9 PVRs to mapfile Add currently supported POWER9 PVRs to the mapfile Signed-off-by: Sukadev Bhattiprolu Cc: Andi Kleen Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Shriya Link: http://lkml.kernel.org/n/tip-k1pe02sn5gh6nrzp8ditye94@git.kernel.org [ Fix conflict with a87006fd5629 ("perf pmu-events: Support additional POWER8+ PVR in mapfile") ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/mapfile.csv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index f03aec70dfc9..14318ef215f8 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -20,3 +20,6 @@ 004d0100,1,power8.json,core 004d0200,1,power8.json,core 004c0100,1,power8.json,core +004e0100,1,power9.json,core +004e0200,1,power9.json,core +004e1200,1,power9.json,core From 047726d1f9ae6b1f3a2144577ffc3a1a92ffedf4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 17:09:21 -0300 Subject: [PATCH 031/253] tools include uapi x86: Grab a copy of unistd.h In older distros we were not including our copies of unistd_{32,64}.h, as we were relying on the system's asm/unistd.h, and a log time ago the files to be included were asm-{x86_64,i386}/unistd.h. Fix it by also carrying a copy of asm/unistd.h, that will be the same as in modern distros and will allow us to provide missing __NR_setns, for instance, in older distros. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-iwmgm0c4m1ynstktzmkjh8di@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd.h | 17 +++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 18 insertions(+) create mode 100644 tools/arch/x86/include/uapi/asm/unistd.h diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..a26df0d75cd0 --- /dev/null +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -0,0 +1,17 @@ +#ifndef _UAPI_ASM_X86_UNISTD_H +#define _UAPI_ASM_X86_UNISTD_H + +/* x32 syscall flag bit */ +#define __X32_SYSCALL_BIT 0x40000000 + +#ifndef __KERNEL__ +# ifdef __i386__ +# include +# elif defined(__ILP32__) +# include +# else +# include +# endif +#endif + +#endif /* _UAPI_ASM_X86_UNISTD_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 83fe2202382e..47abd3325190 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -16,6 +16,7 @@ arch/x86/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm_perf.h arch/x86/include/uapi/asm/svm.h +arch/x86/include/uapi/asm/unistd.h arch/x86/include/uapi/asm/vmx.h arch/powerpc/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm.h From 59291f19824200b5a9046b79d37f02fb415335b0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 17:13:40 -0300 Subject: [PATCH 032/253] tools include uapi x86: Add __NR_setns, if missing To help us provide a simple setns() in older distros. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Krister Johansen Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-o10a85kf6j7ig87ep6crab2k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/unistd_32.h | 3 +++ tools/arch/x86/include/asm/unistd_64.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/asm/unistd_32.h index 88b3f8c8920c..0e4312ffc945 100644 --- a/tools/arch/x86/include/asm/unistd_32.h +++ b/tools/arch/x86/include/asm/unistd_32.h @@ -10,3 +10,6 @@ #ifndef __NR_getcpu # define __NR_getcpu 318 #endif +#ifndef __NR_setns +# define __NR_setns 346 +#endif diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h index fbdb70ee8837..dd56bb36132a 100644 --- a/tools/arch/x86/include/asm/unistd_64.h +++ b/tools/arch/x86/include/asm/unistd_64.h @@ -10,3 +10,6 @@ #ifndef __NR_getcpu # define __NR_getcpu 309 #endif +#ifndef __NR_setns +#define __NR_setns 308 +#endif From 86bcdb5a43997bb02ba25a76482c7bfc652ba45b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 17:15:29 -0300 Subject: [PATCH 033/253] tools build: Add test for setns() And provide an alternative implementation to keep perf building on older distros as we're about to add initial support for namespaces. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Krister Johansen Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bqdwijunhjlvps1ardykhw1i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 3 ++- tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-setns.c | 7 +++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/Build | 4 ++++ tools/perf/util/setns.c | 8 ++++++++ tools/perf/util/util.h | 4 ++++ 8 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 tools/build/feature/test-setns.c create mode 100644 tools/perf/util/setns.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 523911f316ce..c71a05b9c984 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -64,7 +64,8 @@ FEATURE_TESTS_BASIC := \ get_cpuid \ bpf \ sched_getcpu \ - sdt + sdt \ + setns # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index e35e4e5ad192..ee2546ddf028 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -49,7 +49,8 @@ FILES= \ test-sdt.bin \ test-cxx.bin \ test-jvmti.bin \ - test-sched_getcpu.bin + test-sched_getcpu.bin \ + test-setns.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -95,6 +96,9 @@ $(OUTPUT)test-glibc.bin: $(OUTPUT)test-sched_getcpu.bin: $(BUILD) +$(OUTPUT)test-setns.bin: + $(BUILD) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index cc6c7c01f4ca..b5cfc6445771 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -153,6 +153,10 @@ # include "test-sdt.c" #undef main +#define main main_test_setns +# include "test-setns.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -188,6 +192,7 @@ int main(int argc, char *argv[]) main_test_libcrypto(); main_test_sched_getcpu(); main_test_sdt(); + main_test_setns(); return 0; } diff --git a/tools/build/feature/test-setns.c b/tools/build/feature/test-setns.c new file mode 100644 index 000000000000..1f714d2a658b --- /dev/null +++ b/tools/build/feature/test-setns.c @@ -0,0 +1,7 @@ +#define _GNU_SOURCE +#include + +int main(void) +{ + return setns(0, 0); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index bdf0e87f9b29..37d203c4cd1f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -330,6 +330,11 @@ ifeq ($(feature-sched_getcpu), 1) CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT endif +ifeq ($(feature-setns), 1) + CFLAGS += -DHAVE_SETNS_SUPPORT + $(call detected,CONFIG_SETNS) +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 79dea95a7f68..7580fe4d5d30 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -104,6 +104,10 @@ ifndef CONFIG_LIBELF libperf-y += symbol-minimal.o endif +ifndef CONFIG_SETNS +libperf-y += setns.o +endif + libperf-$(CONFIG_DWARF) += probe-finder.o libperf-$(CONFIG_DWARF) += dwarf-aux.o libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/util/setns.c b/tools/perf/util/setns.c new file mode 100644 index 000000000000..ce8fc290fce8 --- /dev/null +++ b/tools/perf/util/setns.c @@ -0,0 +1,8 @@ +#include "util.h" +#include +#include + +int setns(int fd, int nstype) +{ + return syscall(__NR_setns, fd, nstype); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 2c9e58a45310..1e5fe1d9ec32 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -58,4 +58,8 @@ const char *perf_tip(const char *dirpath); int sched_getcpu(void); #endif +#ifndef HAVE_SETNS_SUPPORT +int setns(int fd, int nstype); +#endif + #endif /* GIT_COMPAT_UTIL_H */ From 843ff37bb59edbe51d64e77ba1b3245a15a4dd9f Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:08 -0700 Subject: [PATCH 034/253] perf symbols: Find symbols in different mount namespace Teach perf how to resolve symbols from binaries that are in a different mount namespace from the tool. This allows perf to generate meaningful stack traces even if the binary resides in a different mount namespace from the tool. Signed-off-by: Krister Johansen Tested-by: Brendan Gregg Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/1499305693-1599-2-git-send-email-kjlx@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 1 + tools/perf/util/dso.h | 2 + tools/perf/util/map.c | 2 + tools/perf/util/namespaces.c | 127 +++++++++++++++++++++++++++++++++++ tools/perf/util/namespaces.h | 33 +++++++++ tools/perf/util/symbol.c | 11 +++ tools/perf/util/thread.c | 3 + tools/perf/util/thread.h | 1 + 8 files changed, 180 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 4e7ab611377a..beda40ed63b0 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1236,6 +1236,7 @@ void dso__delete(struct dso *dso) dso_cache__free(dso); dso__free_a2l(dso); zfree(&dso->symsrc_filename); + nsinfo__zput(dso->nsinfo); pthread_mutex_destroy(&dso->lock); free(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index bd061ba7b47c..78ec637fc68b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -10,6 +10,7 @@ #include #include #include "map.h" +#include "namespaces.h" #include "build-id.h" enum dso_binary_type { @@ -187,6 +188,7 @@ struct dso { void *priv; u64 db_id; }; + struct nsinfo *nsinfo; refcount_t refcnt; char name[0]; }; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 2179b2deb730..5dc60ca5a294 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -16,6 +16,7 @@ #include "machine.h" #include #include "srcline.h" +#include "namespaces.h" #include "unwind.h" static void __maps__insert(struct maps *maps, struct map *map); @@ -200,6 +201,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (type != MAP__FUNCTION) dso__set_loaded(dso, map->type); } + dso->nsinfo = nsinfo__get(thread->nsinfo); dso__put(dso); } return map; diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 67dcbcc73c7d..bcc6bb19cf10 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -9,9 +9,13 @@ #include "namespaces.h" #include "util.h" #include "event.h" +#include +#include +#include #include #include #include +#include struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -35,3 +39,126 @@ void namespaces__free(struct namespaces *namespaces) { free(namespaces); } + +void nsinfo__init(struct nsinfo *nsi) +{ + char oldns[PATH_MAX]; + char *newns = NULL; + struct stat old_stat; + struct stat new_stat; + + if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) + return; + + if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1) + return; + + if (stat(oldns, &old_stat) < 0) + goto out; + + if (stat(newns, &new_stat) < 0) + goto out; + + /* Check if the mount namespaces differ, if so then indicate that we + * want to switch as part of looking up dso/map data. + */ + if (old_stat.st_ino != new_stat.st_ino) { + nsi->need_setns = true; + nsi->mntns_path = newns; + newns = NULL; + } + +out: + free(newns); +} + +struct nsinfo *nsinfo__new(pid_t pid) +{ + struct nsinfo *nsi = calloc(1, sizeof(*nsi)); + + if (nsi != NULL) { + nsi->pid = pid; + nsi->need_setns = false; + nsinfo__init(nsi); + refcount_set(&nsi->refcnt, 1); + } + + return nsi; +} + +void nsinfo__delete(struct nsinfo *nsi) +{ + zfree(&nsi->mntns_path); + free(nsi); +} + +struct nsinfo *nsinfo__get(struct nsinfo *nsi) +{ + if (nsi) + refcount_inc(&nsi->refcnt); + return nsi; +} + +void nsinfo__put(struct nsinfo *nsi) +{ + if (nsi && refcount_dec_and_test(&nsi->refcnt)) + nsinfo__delete(nsi); +} + +void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) +{ + char curpath[PATH_MAX]; + int oldns = -1; + int newns = -1; + + if (nc == NULL) + return; + + nc->oldns = -1; + nc->newns = -1; + + if (!nsi || !nsi->need_setns) + return; + + if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) + return; + + oldns = open(curpath, O_RDONLY); + if (oldns < 0) + return; + + newns = open(nsi->mntns_path, O_RDONLY); + if (newns < 0) + goto errout; + + if (setns(newns, CLONE_NEWNS) < 0) + goto errout; + + nc->oldns = oldns; + nc->newns = newns; + return; + +errout: + if (oldns > -1) + close(oldns); + if (newns > -1) + close(newns); +} + +void nsinfo__mountns_exit(struct nscookie *nc) +{ + if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + return; + + setns(nc->oldns, CLONE_NEWNS); + + if (nc->oldns > -1) { + close(nc->oldns); + nc->oldns = -1; + } + + if (nc->newns > -1) { + close(nc->newns); + nc->newns = -1; + } +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 468f1e9a1484..b20f6ead7b2d 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -11,6 +11,7 @@ #include "../perf.h" #include +#include struct namespaces_event; @@ -23,4 +24,36 @@ struct namespaces { struct namespaces *namespaces__new(struct namespaces_event *event); void namespaces__free(struct namespaces *namespaces); +struct nsinfo { + pid_t pid; + bool need_setns; + char *mntns_path; + refcount_t refcnt; +}; + +struct nscookie { + int oldns; + int newns; +}; + +void nsinfo__init(struct nsinfo *nsi); +struct nsinfo *nsinfo__new(pid_t pid); +void nsinfo__delete(struct nsinfo *nsi); + +struct nsinfo *nsinfo__get(struct nsinfo *nsi); +void nsinfo__put(struct nsinfo *nsi); + +void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); +void nsinfo__mountns_exit(struct nscookie *nc); + +static inline void __nsinfo__zput(struct nsinfo **nsip) +{ + if (nsip) { + nsinfo__put(*nsip); + *nsip = NULL; + } +} + +#define nsinfo__zput(nsi) __nsinfo__zput(&nsi) + #endif /* __PERF_NAMESPACES_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e7a98dbd2aed..60a9eaa372ef 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,6 +18,8 @@ #include "symbol.h" #include "strlist.h" #include "intlist.h" +#include "namespaces.h" +#include "vdso.h" #include "header.h" #include "path.h" #include "sane_ctype.h" @@ -1436,9 +1438,17 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; unsigned char build_id[BUILD_ID_SIZE]; + struct nscookie nsc; + nsinfo__mountns_enter(dso->nsinfo, &nsc); pthread_mutex_lock(&dso->lock); + /* The vdso files always live in the host container, so don't go looking + * for them in the container's mount namespace. + */ + if (dso__is_vdso(dso)) + nsinfo__mountns_exit(&nsc); + /* check again under the dso->lock */ if (dso__loaded(dso, map->type)) { ret = 1; @@ -1584,6 +1594,7 @@ int dso__load(struct dso *dso, struct map *map) out: dso__set_loaded(dso, map->type); pthread_mutex_unlock(&dso->lock); + nsinfo__mountns_exit(&nsc); return ret; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 378c418ca0c1..aee9a42102ba 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -59,6 +59,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) list_add(&comm->list, &thread->comm_list); refcount_set(&thread->refcnt, 1); RB_CLEAR_NODE(&thread->rb_node); + /* Thread holds first ref to nsdata. */ + thread->nsinfo = nsinfo__new(pid); } return thread; @@ -91,6 +93,7 @@ void thread__delete(struct thread *thread) comm__free(comm); } unwind__finish_access(thread); + nsinfo__zput(thread->nsinfo); free(thread); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 4eb849e9098f..cb1a5dd5c2b9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -34,6 +34,7 @@ struct thread { void *priv; struct thread_stack *ts; + struct nsinfo *nsinfo; #ifdef HAVE_LIBUNWIND_SUPPORT void *addr_space; struct unwind_libunwind_ops *unwind_libunwind_ops; From bf2e710b3cb8445c052f2ff50b4875a2523bb279 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:09 -0700 Subject: [PATCH 035/253] perf maps: Lookup maps in both intitial mountns and inner mountns. If a process is in a mountns and has symbols in /tmp/perf-.map, look first in the namespace using the tgid for the pidns that the process might be in. If the map isn't found there, try looking in the mountns where perf is running, and use the tgid that's appropriate for perf's pid namespace. If all else fails, use the original pid. This allows us to locate a symbol map file in the mount namespace, if it was generated there. However, we also try the tool's /tmp in case it's there instead. Signed-off-by: Krister Johansen Tested-by: Brendan Gregg Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/1499305693-1599-3-git-send-email-kjlx@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 +- tools/perf/util/map.c | 23 ++++++++-- tools/perf/util/map.h | 2 +- tools/perf/util/namespaces.c | 83 +++++++++++++++++++++++++++++++++--- tools/perf/util/namespaces.h | 5 ++- tools/perf/util/symbol.c | 70 ++++++++++++++++++++++++------ 6 files changed, 160 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2e9eb6aa3ce2..246b441110a1 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1392,7 +1392,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, - event->mmap2.pid, event->mmap2.maj, + event->mmap2.maj, event->mmap2.min, event->mmap2.ino, event->mmap2.ino_generation, event->mmap2.prot, @@ -1450,7 +1450,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - event->mmap.pid, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, event->mmap.filename, type, thread); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 5dc60ca5a294..bdaa0a4edc17 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -146,11 +146,13 @@ void map__init(struct map *map, enum map_type type, } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type, struct thread *thread) { struct map *map = malloc(sizeof(*map)); + struct nsinfo *nsi = NULL; + struct nsinfo *nnsi; if (map != NULL) { char newfilename[PATH_MAX]; @@ -168,9 +170,11 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map->ino_generation = ino_gen; map->prot = prot; map->flags = flags; + nsi = nsinfo__get(thread->nsinfo); - if ((anon || no_dso) && type == MAP__FUNCTION) { - snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid); + if ((anon || no_dso) && nsi && type == MAP__FUNCTION) { + snprintf(newfilename, sizeof(newfilename), + "/tmp/perf-%d.map", nsi->pid); filename = newfilename; } @@ -180,6 +184,16 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, } if (vdso) { + /* The vdso maps are always on the host and not the + * container. Ensure that we don't use setns to look + * them up. + */ + nnsi = nsinfo__copy(nsi); + if (nnsi) { + nsinfo__put(nsi); + nnsi->need_setns = false; + nsi = nnsi; + } pgoff = 0; dso = machine__findnew_vdso(machine, thread); } else @@ -201,11 +215,12 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (type != MAP__FUNCTION) dso__set_loaded(dso, map->type); } - dso->nsinfo = nsinfo__get(thread->nsinfo); + dso->nsinfo = nsi; dso__put(dso); } return map; out_delete: + nsinfo__put(nsi); free(map); return NULL; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index f9e8ac8a52cd..73aacf7a7dc4 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -141,7 +141,7 @@ struct thread; void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino, + u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, char *filename, enum map_type type, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index bcc6bb19cf10..fc5f398779a4 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -40,18 +40,23 @@ void namespaces__free(struct namespaces *namespaces) free(namespaces); } -void nsinfo__init(struct nsinfo *nsi) +int nsinfo__init(struct nsinfo *nsi) { char oldns[PATH_MAX]; + char spath[PATH_MAX]; char *newns = NULL; + char *statln = NULL; struct stat old_stat; struct stat new_stat; + FILE *f = NULL; + size_t linesz = 0; + int rv = -1; if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) - return; + return rv; if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1) - return; + return rv; if (stat(oldns, &old_stat) < 0) goto out; @@ -68,24 +73,89 @@ void nsinfo__init(struct nsinfo *nsi) newns = NULL; } + /* If we're dealing with a process that is in a different PID namespace, + * attempt to work out the innermost tgid for the process. + */ + if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX) + goto out; + + f = fopen(spath, "r"); + if (f == NULL) + goto out; + + while (getline(&statln, &linesz, f) != -1) { + /* Use tgid if CONFIG_PID_NS is not defined. */ + if (strstr(statln, "Tgid:") != NULL) { + nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + nsi->nstgid = nsi->tgid; + } + + if (strstr(statln, "NStgid:") != NULL) { + nsi->nstgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + break; + } + } + rv = 0; + out: + if (f != NULL) + (void) fclose(f); + free(statln); free(newns); + return rv; } struct nsinfo *nsinfo__new(pid_t pid) { - struct nsinfo *nsi = calloc(1, sizeof(*nsi)); + struct nsinfo *nsi; + if (pid == 0) + return NULL; + + nsi = calloc(1, sizeof(*nsi)); if (nsi != NULL) { nsi->pid = pid; + nsi->tgid = pid; + nsi->nstgid = pid; nsi->need_setns = false; - nsinfo__init(nsi); + /* Init may fail if the process exits while we're trying to look + * at its proc information. In that case, save the pid but + * don't try to enter the namespace. + */ + if (nsinfo__init(nsi) == -1) + nsi->need_setns = false; + refcount_set(&nsi->refcnt, 1); } return nsi; } +struct nsinfo *nsinfo__copy(struct nsinfo *nsi) +{ + struct nsinfo *nnsi; + + nnsi = calloc(1, sizeof(*nnsi)); + if (nnsi != NULL) { + nnsi->pid = nsi->pid; + nnsi->tgid = nsi->tgid; + nnsi->nstgid = nsi->nstgid; + nnsi->need_setns = nsi->need_setns; + if (nsi->mntns_path) { + nnsi->mntns_path = strdup(nsi->mntns_path); + if (!nnsi->mntns_path) { + free(nnsi); + return NULL; + } + } + refcount_set(&nnsi->refcnt, 1); + } + + return nnsi; +} + void nsinfo__delete(struct nsinfo *nsi) { zfree(&nsi->mntns_path); @@ -105,7 +175,8 @@ void nsinfo__put(struct nsinfo *nsi) nsinfo__delete(nsi); } -void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) +void nsinfo__mountns_enter(struct nsinfo *nsi, + struct nscookie *nc) { char curpath[PATH_MAX]; int oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index b20f6ead7b2d..f19aa41119ae 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -26,6 +26,8 @@ void namespaces__free(struct namespaces *namespaces); struct nsinfo { pid_t pid; + pid_t tgid; + pid_t nstgid; bool need_setns; char *mntns_path; refcount_t refcnt; @@ -36,8 +38,9 @@ struct nscookie { int newns; }; -void nsinfo__init(struct nsinfo *nsi); +int nsinfo__init(struct nsinfo *nsi); struct nsinfo *nsinfo__new(pid_t pid); +struct nsinfo *nsinfo__copy(struct nsinfo *nsi); void nsinfo__delete(struct nsinfo *nsi); struct nsinfo *nsinfo__get(struct nsinfo *nsi); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 60a9eaa372ef..21c97cc41cfc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,7 +19,6 @@ #include "strlist.h" #include "intlist.h" #include "namespaces.h" -#include "vdso.h" #include "header.h" #include "path.h" #include "sane_ctype.h" @@ -1327,14 +1326,15 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, return __dso__load_kallsyms(dso, filename, map, false); } -static int dso__load_perf_map(struct dso *dso, struct map *map) +static int dso__load_perf_map(const char *map_path, struct dso *dso, + struct map *map) { char *line = NULL; size_t n; FILE *file; int nr_syms = 0; - file = fopen(dso->long_name, "r"); + file = fopen(map_path, "r"); if (file == NULL) goto out_failure; @@ -1426,6 +1426,45 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, } } +/* Checks for the existence of the perf-.map file in two different + * locations. First, if the process is a separate mount namespace, check in + * that namespace using the pid of the innermost pid namespace. If's not in a + * namespace, or the file can't be found there, try in the mount namespace of + * the tracing process using our view of its pid. + */ +static int dso__find_perf_map(char *filebuf, size_t bufsz, + struct nsinfo **nsip) +{ + struct nscookie nsc; + struct nsinfo *nsi; + struct nsinfo *nnsi; + int rc = -1; + + nsi = *nsip; + + if (nsi->need_setns) { + snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsi->nstgid); + nsinfo__mountns_enter(nsi, &nsc); + rc = access(filebuf, R_OK); + nsinfo__mountns_exit(&nsc); + if (rc == 0) + return rc; + } + + nnsi = nsinfo__copy(nsi); + if (nnsi) { + nsinfo__put(nsi); + + nnsi->need_setns = false; + snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nnsi->tgid); + *nsip = nnsi; + rc = 0; + } + + return rc; +} + + int dso__load(struct dso *dso, struct map *map) { char *name; @@ -1437,18 +1476,23 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc ss_[2]; struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; + bool perfmap; unsigned char build_id[BUILD_ID_SIZE]; struct nscookie nsc; + char newmapname[PATH_MAX]; + const char *map_path = dso->long_name; + + perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0; + if (perfmap) { + if (dso->nsinfo && (dso__find_perf_map(newmapname, + sizeof(newmapname), &dso->nsinfo) == 0)) { + map_path = newmapname; + } + } nsinfo__mountns_enter(dso->nsinfo, &nsc); pthread_mutex_lock(&dso->lock); - /* The vdso files always live in the host container, so don't go looking - * for them in the container's mount namespace. - */ - if (dso__is_vdso(dso)) - nsinfo__mountns_exit(&nsc); - /* check again under the dso->lock */ if (dso__loaded(dso, map->type)) { ret = 1; @@ -1471,19 +1515,19 @@ int dso__load(struct dso *dso, struct map *map) dso->adjust_symbols = 0; - if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { + if (perfmap) { struct stat st; - if (lstat(dso->name, &st) < 0) + if (lstat(map_path, &st) < 0) goto out; if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) { pr_warning("File %s not owned by current user or root, " - "ignoring it (use -f to override).\n", dso->name); + "ignoring it (use -f to override).\n", map_path); goto out; } - ret = dso__load_perf_map(dso, map); + ret = dso__load_perf_map(map_path, dso, map); dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : DSO_BINARY_TYPE__NOT_FOUND; goto out; From 544abd44c7064c8a58a6bd2073d757f6b91d98c5 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:10 -0700 Subject: [PATCH 036/253] perf probe: Allow placing uprobes in alternate namespaces. Teaches perf how to place a uprobe on a file that's in a different mount namespace. The user must add the probe using the --target-ns argument to perf probe. Once it has been placed, it may be recorded against without further namespace-specific commands. Signed-off-by: Krister Johansen Cc: Alexander Shishkin Cc: Brendan Gregg Cc: Peter Zijlstra Cc: Ravi Bangoria [ PPC build fixed by Ravi: ] Link: http://lkml.kernel.org/r/1500287542-6219-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Cc: Thomas-Mich Richter [ Fix !HAVE_DWARF_SUPPORT build ] Link: http://lkml.kernel.org/r/1499305693-1599-4-git-send-email-kjlx@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 9 +++ tools/perf/arch/powerpc/util/sym-handling.c | 2 +- tools/perf/builtin-probe.c | 43 +++++++++-- tools/perf/util/namespaces.c | 13 ++++ tools/perf/util/namespaces.h | 2 + tools/perf/util/probe-event.c | 80 ++++++++++++++------- tools/perf/util/probe-event.h | 10 ++- 7 files changed, 125 insertions(+), 34 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 165c2b1d4317..a42aabc2b082 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -130,6 +130,11 @@ OPTIONS --max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. +--target-ns=PID: + Obtain mount namespace information from the target pid. This is + used when creating a uprobe for a process that resides in a + different mount namespace from the perf(1) utility. + -x:: --exec=PATH:: Specify path to the executable or shared library file for user @@ -264,6 +269,10 @@ Add probes at malloc() function on libc ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc +Add a uprobe to a target process running in a different mount namespace + + ./perf probe --target-ns -x /lib64/libc.so.6 malloc + SEE ALSO -------- linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1] diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index bf9a2594572c..9c4e23d8c8ce 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -126,7 +126,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, struct rb_node *tmp; int i = 0; - map = get_target_map(pev->target, pev->uprobes); + map = get_target_map(pev->target, pev->nsi, pev->uprobes); if (!map || map__load(map) < 0) return; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index cf9f9e9c2fc0..3fb98d59cd27 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -58,6 +58,7 @@ static struct { struct line_range line_range; char *target; struct strfilter *filter; + struct nsinfo *nsi; } params; /* Parse an event definition. Note that any error must die. */ @@ -80,6 +81,9 @@ static int parse_probe_event(const char *str) params.target_used = true; } + if (params.nsi) + pev->nsi = nsinfo__get(params.nsi); + /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); pr_debug("%d arguments\n", pev->nargs); @@ -189,7 +193,7 @@ static int opt_set_target(const struct option *opt, const char *str, /* Expand given path to absolute path, except for modulename */ if (params.uprobes || strchr(str, '/')) { - tmp = realpath(str, NULL); + tmp = nsinfo__realpath(str, params.nsi); if (!tmp) { pr_warning("Failed to get the absolute path of %s: %m\n", str); return ret; @@ -208,6 +212,34 @@ static int opt_set_target(const struct option *opt, const char *str, return ret; } +static int opt_set_target_ns(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) +{ + int ret = -ENOENT; + pid_t ns_pid; + struct nsinfo *nsip; + + if (str) { + errno = 0; + ns_pid = (pid_t)strtol(str, NULL, 10); + if (errno != 0) { + ret = -errno; + pr_warning("Failed to parse %s as a pid: %s\n", str, + strerror(errno)); + return ret; + } + nsip = nsinfo__new(ns_pid); + if (nsip && nsip->need_setns) + params.nsi = nsinfo__get(nsip); + nsinfo__put(nsip); + + ret = 0; + } + + return ret; +} + + /* Command option callbacks */ #ifdef HAVE_DWARF_SUPPORT @@ -299,6 +331,7 @@ static void cleanup_params(void) line_range__clear(¶ms.line_range); free(params.target); strfilter__delete(params.filter); + nsinfo__put(params.nsi); memset(¶ms, 0, sizeof(params)); } @@ -554,6 +587,8 @@ __cmd_probe(int argc, const char **argv) OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "target-ns", NULL, "pid", + "target pid for namespace contexts", opt_set_target_ns), OPT_END() }; int ret; @@ -634,15 +669,15 @@ __cmd_probe(int argc, const char **argv) pr_err_with_code(" Error: Failed to show event list.", ret); return ret; case 'F': - ret = show_available_funcs(params.target, params.filter, - params.uprobes); + ret = show_available_funcs(params.target, params.nsi, + params.filter, params.uprobes); if (ret < 0) pr_err_with_code(" Error: Failed to show functions.", ret); return ret; #ifdef HAVE_DWARF_SUPPORT case 'L': ret = show_line_range(¶ms.line_range, params.target, - params.uprobes); + params.nsi, params.uprobes); if (ret < 0) pr_err_with_code(" Error: Failed to show lines.", ret); return ret; diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index fc5f398779a4..a58e91197729 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -11,6 +11,7 @@ #include "event.h" #include #include +#include #include #include #include @@ -233,3 +234,15 @@ void nsinfo__mountns_exit(struct nscookie *nc) nc->newns = -1; } } + +char *nsinfo__realpath(const char *path, struct nsinfo *nsi) +{ + char *rpath; + struct nscookie nsc; + + nsinfo__mountns_enter(nsi, &nsc); + rpath = realpath(path, NULL); + nsinfo__mountns_exit(&nsc); + + return rpath; +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index f19aa41119ae..05d82601c9a6 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -49,6 +49,8 @@ void nsinfo__put(struct nsinfo *nsi); void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); void nsinfo__mountns_exit(struct nscookie *nc); +char *nsinfo__realpath(const char *path, struct nsinfo *nsi); + static inline void __nsinfo__zput(struct nsinfo **nsip) { if (nsip) { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a2670e9d652d..a80895a7e611 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -184,13 +184,19 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } -struct map *get_target_map(const char *target, bool user) +struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) { /* Init maps of given executable or kernel */ - if (user) - return dso__new_map(target); - else + if (user) { + struct map *map; + + map = dso__new_map(target); + if (map && map->dso) + map->dso->nsinfo = nsinfo__get(nsi); + return map; + } else { return kernel_get_module_map(target); + } } static int convert_exec_to_group(const char *exec, char **result) @@ -366,7 +372,8 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) static int find_alternative_probe_point(struct debuginfo *dinfo, struct perf_probe_point *pp, struct perf_probe_point *result, - const char *target, bool uprobes) + const char *target, struct nsinfo *nsi, + bool uprobes) { struct map *map = NULL; struct symbol *sym; @@ -377,7 +384,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, if (!pp->function || pp->file) return -ENOTSUP; - map = get_target_map(target, uprobes); + map = get_target_map(target, nsi, uprobes); if (!map) return -EINVAL; @@ -421,8 +428,8 @@ static int get_alternative_probe_event(struct debuginfo *dinfo, memcpy(tmp, &pev->point, sizeof(*tmp)); memset(&pev->point, 0, sizeof(pev->point)); - ret = find_alternative_probe_point(dinfo, tmp, &pev->point, - pev->target, pev->uprobes); + ret = find_alternative_probe_point(dinfo, tmp, &pev->point, pev->target, + pev->nsi, pev->uprobes); if (ret < 0) memcpy(&pev->point, tmp, sizeof(*tmp)); @@ -444,7 +451,7 @@ static int get_alternative_line_range(struct debuginfo *dinfo, if (lr->end != INT_MAX) len = lr->end - lr->start; ret = find_alternative_probe_point(dinfo, &pp, &result, - target, user); + target, NULL, user); if (!ret) { lr->function = result.function; lr->file = result.file; @@ -457,12 +464,14 @@ static int get_alternative_line_range(struct debuginfo *dinfo, } /* Open new debuginfo of given module */ -static struct debuginfo *open_debuginfo(const char *module, bool silent) +static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, + bool silent) { const char *path = module; char reason[STRERR_BUFSIZE]; struct debuginfo *ret = NULL; struct dso *dso = NULL; + struct nscookie nsc; int err; if (!module || !strchr(module, '/')) { @@ -480,6 +489,7 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent) } path = dso->long_name; } + nsinfo__mountns_enter(nsi, &nsc); ret = debuginfo__new(path); if (!ret && !silent) { pr_warning("The %s file has no debug information.\n", path); @@ -489,6 +499,7 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent) pr_warning("Rebuild with -g, "); pr_warning("or install an appropriate debuginfo package.\n"); } + nsinfo__mountns_exit(&nsc); return ret; } @@ -516,7 +527,7 @@ static struct debuginfo *debuginfo_cache__open(const char *module, bool silent) goto out; } - debuginfo_cache = open_debuginfo(module, silent); + debuginfo_cache = open_debuginfo(module, NULL, silent); if (!debuginfo_cache) zfree(&debuginfo_cache_path); out: @@ -531,14 +542,18 @@ static void debuginfo_cache__exit(void) } -static int get_text_start_address(const char *exec, unsigned long *address) +static int get_text_start_address(const char *exec, unsigned long *address, + struct nsinfo *nsi) { Elf *elf; GElf_Ehdr ehdr; GElf_Shdr shdr; int fd, ret = -ENOENT; + struct nscookie nsc; + nsinfo__mountns_enter(nsi, &nsc); fd = open(exec, O_RDONLY); + nsinfo__mountns_exit(&nsc); if (fd < 0) return -errno; @@ -582,7 +597,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, ret = -EINVAL; goto error; } - ret = get_text_start_address(tp->module, &stext); + ret = get_text_start_address(tp->module, &stext, NULL); if (ret < 0) goto error; addr += stext; @@ -659,7 +674,7 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs, /* Prepare a map for offline binary */ map = dso__new_map(pathname); - if (!map || get_text_start_address(pathname, &stext) < 0) { + if (!map || get_text_start_address(pathname, &stext, NULL) < 0) { pr_warning("Failed to get ELF symbols for %s\n", pathname); return -EINVAL; } @@ -676,7 +691,8 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs, } static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, - int ntevs, const char *exec) + int ntevs, const char *exec, + struct nsinfo *nsi) { int i, ret = 0; unsigned long stext = 0; @@ -684,7 +700,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, if (!exec) return 0; - ret = get_text_start_address(exec, &stext); + ret = get_text_start_address(exec, &stext, nsi); if (ret < 0) return ret; @@ -715,7 +731,7 @@ post_process_module_probe_trace_events(struct probe_trace_event *tevs, if (!module) return 0; - map = get_target_map(module, false); + map = get_target_map(module, NULL, false); if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) { pr_warning("Failed to get ELF symbols for %s\n", module); return -EINVAL; @@ -802,7 +818,8 @@ static int post_process_probe_trace_events(struct perf_probe_event *pev, int ret; if (uprobe) - ret = add_exec_to_probe_trace_events(tevs, ntevs, module); + ret = add_exec_to_probe_trace_events(tevs, ntevs, module, + pev->nsi); else if (module) /* Currently ref_reloc_sym based probe is not for drivers */ ret = post_process_module_probe_trace_events(tevs, ntevs, @@ -825,7 +842,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct debuginfo *dinfo; int ntevs, ret = 0; - dinfo = open_debuginfo(pev->target, !need_dwarf); + dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf); if (!dinfo) { if (need_dwarf) return -ENOENT; @@ -945,7 +962,7 @@ static int __show_line_range(struct line_range *lr, const char *module, char sbuf[STRERR_BUFSIZE]; /* Search a line range */ - dinfo = open_debuginfo(module, false); + dinfo = open_debuginfo(module, NULL, false); if (!dinfo) return -ENOENT; @@ -1021,14 +1038,18 @@ static int __show_line_range(struct line_range *lr, const char *module, return ret; } -int show_line_range(struct line_range *lr, const char *module, bool user) +int show_line_range(struct line_range *lr, const char *module, + struct nsinfo *nsi, bool user) { int ret; + struct nscookie nsc; ret = init_probe_symbol_maps(user); if (ret < 0) return ret; + nsinfo__mountns_enter(nsi, &nsc); ret = __show_line_range(lr, module, user); + nsinfo__mountns_exit(&nsc); exit_probe_symbol_maps(); return ret; @@ -1111,7 +1132,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, if (ret < 0) return ret; - dinfo = open_debuginfo(pevs->target, false); + dinfo = open_debuginfo(pevs->target, pevs->nsi, false); if (!dinfo) { ret = -ENOENT; goto out; @@ -1155,6 +1176,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, int show_line_range(struct line_range *lr __maybe_unused, const char *module __maybe_unused, + struct nsinfo *nsi __maybe_unused, bool user __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); @@ -2703,6 +2725,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tev = NULL; struct probe_cache *cache = NULL; struct strlist *namelist[2] = {NULL, NULL}; + struct nscookie nsc; up = pev->uprobes ? 1 : 0; fd[up] = __open_probe_file_and_namelist(up, &namelist[up]); @@ -2729,7 +2752,9 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret < 0) break; + nsinfo__mountns_enter(pev->nsi, &nsc); ret = probe_file__add_event(fd[up], tev); + nsinfo__mountns_exit(&nsc); if (ret < 0) break; @@ -2805,7 +2830,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, int ret, i, j, skipped = 0; char *mod_name; - map = get_target_map(pev->target, pev->uprobes); + map = get_target_map(pev->target, pev->nsi, pev->uprobes); if (!map) { ret = -EINVAL; goto out; @@ -3345,13 +3370,16 @@ int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs) void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) { int i, j; + struct perf_probe_event *pev; /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { + pev = &pevs[i]; for (j = 0; j < pevs[i].ntevs; j++) clear_probe_trace_event(&pevs[i].tevs[j]); zfree(&pevs[i].tevs); pevs[i].ntevs = 0; + nsinfo__zput(pev->nsi); clear_perf_probe_event(&pevs[i]); } } @@ -3409,8 +3437,8 @@ int del_perf_probe_events(struct strfilter *filter) return ret; } -int show_available_funcs(const char *target, struct strfilter *_filter, - bool user) +int show_available_funcs(const char *target, struct nsinfo *nsi, + struct strfilter *_filter, bool user) { struct rb_node *nd; struct map *map; @@ -3421,7 +3449,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, return ret; /* Get a symbol map */ - map = get_target_map(target, user); + map = get_target_map(target, nsi, user); if (!map) { pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); return -EINVAL; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 5812947418dd..078681d12168 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -4,6 +4,7 @@ #include #include #include "intlist.h" +#include "namespaces.h" /* Probe related configurations */ struct probe_conf { @@ -92,6 +93,7 @@ struct perf_probe_event { struct perf_probe_arg *args; /* Arguments */ struct probe_trace_event *tevs; int ntevs; + struct nsinfo *nsi; /* Target namespace */ }; /* Line range */ @@ -163,10 +165,12 @@ int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); int show_perf_probe_events(struct strfilter *filter); -int show_line_range(struct line_range *lr, const char *module, bool user); +int show_line_range(struct line_range *lr, const char *module, + struct nsinfo *nsi, bool user); int show_available_vars(struct perf_probe_event *pevs, int npevs, struct strfilter *filter); -int show_available_funcs(const char *module, struct strfilter *filter, bool user); +int show_available_funcs(const char *module, struct nsinfo *nsi, + struct strfilter *filter, bool user); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map, struct symbol *sym); @@ -180,7 +184,7 @@ int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4); int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, struct perf_probe_arg *pvar); -struct map *get_target_map(const char *target, bool user); +struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user); void arch__post_process_probe_trace_events(struct perf_probe_event *pev, int ntevs); From f045b8c4b36baddcfbdd4d3d956446e688b0b3cd Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:11 -0700 Subject: [PATCH 037/253] perf buildid-cache: Support binary objects from other namespaces Teach buildid-cache how to add, remove, and update binary objects from other mount namespaces. Allow probe events tracing binaries in different namespaces to add their objects to the probe and build-id caches too. As a handy side effect, this also lets us access SDT probes in binaries from alternate mount namespaces. Signed-off-by: Krister Johansen Tested-by: Brendan Gregg Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/1499305693-1599-5-git-send-email-kjlx@templeofstupid.com [ Add util/namespaces.c to tools/perf/util/python-ext-sources, to fix the python binding 'perf test' ] Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/Documentation/perf-buildid-cache.txt | 5 ++ tools/perf/Documentation/perf-probe.txt | 5 ++ tools/perf/builtin-buildid-cache.c | 52 +++++++++++++------ tools/perf/builtin-probe.c | 2 +- tools/perf/tests/sdt.c | 4 +- tools/perf/util/build-id.c | 47 +++++++++++------ tools/perf/util/build-id.h | 9 ++-- tools/perf/util/dso.c | 12 ++++- tools/perf/util/parse-events.c | 2 +- tools/perf/util/probe-event.c | 6 +-- tools/perf/util/probe-file.c | 19 ++++--- tools/perf/util/probe-file.h | 4 +- tools/perf/util/python-ext-sources | 1 + tools/perf/util/symbol.c | 19 +++++-- tools/perf/util/util.c | 34 +++++++++--- tools/perf/util/util.h | 2 + 16 files changed, 160 insertions(+), 63 deletions(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 058064db39d2..84681007f80f 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -61,6 +61,11 @@ OPTIONS --verbose:: Be more verbose. +--target-ns=PID: + Obtain mount namespace information from the target pid. This is + used when creating a uprobe for a process that resides in a + different mount namespace from the perf(1) utility. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index a42aabc2b082..d7e4869905f1 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -273,6 +273,11 @@ Add a uprobe to a target process running in a different mount namespace ./perf probe --target-ns -x /lib64/libc.so.6 malloc +Add a USDT probe to a target process running in a different mount namespace + + ./perf probe --target-ns -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end + + SEE ALSO -------- linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1] diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 9eba7f1add1f..d65bd86bee99 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -14,6 +14,7 @@ #include #include "builtin.h" #include "perf.h" +#include "namespaces.h" #include "util/cache.h" #include "util/debug.h" #include "util/header.h" @@ -165,33 +166,41 @@ static int build_id_cache__add_kcore(const char *filename, bool force) return 0; } -static int build_id_cache__add_file(const char *filename) +static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; u8 build_id[BUILD_ID_SIZE]; int err; + struct nscookie nsc; - if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + nsinfo__mountns_enter(nsi, &nsc); + err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + nsinfo__mountns_exit(&nsc); + if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, filename, + err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename) +static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct nscookie nsc; int err; - if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + nsinfo__mountns_enter(nsi, &nsc); + err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + nsinfo__mountns_exit(&nsc); + if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } @@ -204,13 +213,13 @@ static int build_id_cache__remove_file(const char *filename) return err; } -static int build_id_cache__purge_path(const char *pathname) +static int build_id_cache__purge_path(const char *pathname, struct nsinfo *nsi) { struct strlist *list; struct str_node *pos; int err; - err = build_id_cache__list_build_ids(pathname, &list); + err = build_id_cache__list_build_ids(pathname, nsi, &list); if (err) goto out; @@ -256,24 +265,30 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename) +static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct nscookie nsc; - int err = 0; + int err; - if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + nsinfo__mountns_enter(nsi, &nsc); + err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + nsinfo__mountns_exit(&nsc); + if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } + err = 0; build_id__sprintf(build_id, sizeof(build_id), sbuild_id); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); if (!err) - err = build_id_cache__add_s(sbuild_id, filename, false, false); + err = build_id_cache__add_s(sbuild_id, filename, nsi, false, + false); pr_debug("Updating %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -286,6 +301,7 @@ int cmd_buildid_cache(int argc, const char **argv) struct strlist *list; struct str_node *pos; int ret = 0; + int ns_id = -1; bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, @@ -299,6 +315,7 @@ int cmd_buildid_cache(int argc, const char **argv) .mode = PERF_DATA_MODE_READ, }; struct perf_session *session = NULL; + struct nsinfo *nsi = NULL; const struct option buildid_cache_options[] = { OPT_STRING('a', "add", &add_name_list_str, @@ -315,6 +332,7 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() }; const char * const buildid_cache_usage[] = { @@ -330,6 +348,9 @@ int cmd_buildid_cache(int argc, const char **argv) !missing_filename && !update_name_list_str)) usage_with_options(buildid_cache_usage, buildid_cache_options); + if (ns_id > 0) + nsi = nsinfo__new(ns_id); + if (missing_filename) { file.path = missing_filename; file.force = force; @@ -348,7 +369,7 @@ int cmd_buildid_cache(int argc, const char **argv) list = strlist__new(add_name_list_str, NULL); if (list) { strlist__for_each_entry(pos, list) - if (build_id_cache__add_file(pos->s)) { + if (build_id_cache__add_file(pos->s, nsi)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -366,7 +387,7 @@ int cmd_buildid_cache(int argc, const char **argv) list = strlist__new(remove_name_list_str, NULL); if (list) { strlist__for_each_entry(pos, list) - if (build_id_cache__remove_file(pos->s)) { + if (build_id_cache__remove_file(pos->s, nsi)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -384,7 +405,7 @@ int cmd_buildid_cache(int argc, const char **argv) list = strlist__new(purge_name_list_str, NULL); if (list) { strlist__for_each_entry(pos, list) - if (build_id_cache__purge_path(pos->s)) { + if (build_id_cache__purge_path(pos->s, nsi)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -405,7 +426,7 @@ int cmd_buildid_cache(int argc, const char **argv) list = strlist__new(update_name_list_str, NULL); if (list) { strlist__for_each_entry(pos, list) - if (build_id_cache__update_file(pos->s)) { + if (build_id_cache__update_file(pos->s, nsi)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -424,6 +445,7 @@ int cmd_buildid_cache(int argc, const char **argv) out: perf_session__delete(session); + nsinfo__zput(nsi); return ret; } diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 3fb98d59cd27..c0065923a525 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -416,7 +416,7 @@ static int del_perf_probe_caches(struct strfilter *filter) } strlist__for_each_entry(nd, bidlist) { - cache = probe_cache__new(nd->s); + cache = probe_cache__new(nd->s, NULL); if (!cache) continue; if (probe_cache__filter_purge(cache, filter) < 0 || diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 06eda675ae2c..5abe8cea54e6 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -33,7 +33,7 @@ static int build_id_cache__add_file(const char *filename) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, filename, false, false); + err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); return err; @@ -54,7 +54,7 @@ static char *get_self_path(void) static int search_cached_probe(const char *target, const char *group, const char *event) { - struct probe_cache *cache = probe_cache__new(target); + struct probe_cache *cache = probe_cache__new(target, NULL); int ret = 0; if (!cache) { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e0148b081bdf..f7bfd90a7388 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -534,13 +534,14 @@ char *build_id_cache__complement(const char *incomplete_sbuild_id) } char *build_id_cache__cachedir(const char *sbuild_id, const char *name, - bool is_kallsyms, bool is_vdso) + struct nsinfo *nsi, bool is_kallsyms, + bool is_vdso) { char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; if (!slash) { - realname = realpath(name, NULL); + realname = nsinfo__realpath(name, nsi); if (!realname) return NULL; } @@ -556,13 +557,13 @@ char *build_id_cache__cachedir(const char *sbuild_id, const char *name, return filename; } -int build_id_cache__list_build_ids(const char *pathname, +int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi, struct strlist **result) { char *dir_name; int ret = 0; - dir_name = build_id_cache__cachedir(NULL, pathname, false, false); + dir_name = build_id_cache__cachedir(NULL, pathname, nsi, false, false); if (!dir_name) return -ENOMEM; @@ -576,16 +577,20 @@ int build_id_cache__list_build_ids(const char *pathname, #if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT) static int build_id_cache__add_sdt_cache(const char *sbuild_id, - const char *realname) + const char *realname, + struct nsinfo *nsi) { struct probe_cache *cache; int ret; + struct nscookie nsc; - cache = probe_cache__new(sbuild_id); + cache = probe_cache__new(sbuild_id, nsi); if (!cache) return -1; + nsinfo__mountns_enter(nsi, &nsc); ret = probe_cache__scan_sdt(cache, realname); + nsinfo__mountns_exit(&nsc); if (ret >= 0) { pr_debug4("Found %d SDTs in %s\n", ret, realname); if (probe_cache__commit(cache) < 0) @@ -595,11 +600,11 @@ static int build_id_cache__add_sdt_cache(const char *sbuild_id, return ret; } #else -#define build_id_cache__add_sdt_cache(sbuild_id, realname) (0) +#define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0) #endif int build_id_cache__add_s(const char *sbuild_id, const char *name, - bool is_kallsyms, bool is_vdso) + struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname = NULL, *filename = NULL, *dir_name = NULL, @@ -607,13 +612,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, int err = -1; if (!is_kallsyms) { - realname = realpath(name, NULL); + if (!is_vdso) + realname = nsinfo__realpath(name, nsi); + else + realname = realpath(name, NULL); if (!realname) goto out_free; } - dir_name = build_id_cache__cachedir(sbuild_id, name, - is_kallsyms, is_vdso); + dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms, + is_vdso); if (!dir_name) goto out_free; @@ -634,7 +642,10 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, if (access(filename, F_OK)) { if (is_kallsyms) { - if (copyfile("/proc/kallsyms", filename)) + if (copyfile("/proc/kallsyms", filename)) + goto out_free; + } else if (nsi && nsi->need_setns) { + if (copyfile_ns(name, filename, nsi)) goto out_free; } else if (link(realname, filename) && errno != EEXIST && copyfile(name, filename)) @@ -657,7 +668,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, err = 0; /* Update SDT cache : error is just warned */ - if (realname && build_id_cache__add_sdt_cache(sbuild_id, realname) < 0) + if (realname && + build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) < 0) pr_debug4("Failed to update/scan SDT cache for %s\n", realname); out_free: @@ -670,14 +682,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, bool is_kallsyms, - bool is_vdso) + const char *name, struct nsinfo *nsi, + bool is_kallsyms, bool is_vdso) { char sbuild_id[SBUILD_ID_SIZE]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, + is_vdso); } bool build_id_cache__cached(const char *sbuild_id) @@ -743,7 +756,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - is_kallsyms, is_vdso); + dso->nsinfo, is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 96690a55c62c..23970847d4c4 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -5,6 +5,7 @@ #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) #include "tool.h" +#include "namespaces.h" #include extern struct perf_tool build_id__mark_dso_hit_ops; @@ -31,17 +32,19 @@ int perf_session__cache_build_ids(struct perf_session *session); char *build_id_cache__origname(const char *sbuild_id); char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size); char *build_id_cache__cachedir(const char *sbuild_id, const char *name, - bool is_kallsyms, bool is_vdso); + struct nsinfo *nsi, bool is_kallsyms, + bool is_vdso); struct strlist; struct strlist *build_id_cache__list_all(bool validonly); char *build_id_cache__complement(const char *incomplete_sbuild_id); -int build_id_cache__list_build_ids(const char *pathname, +int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi, struct strlist **result); bool build_id_cache__cached(const char *sbuild_id); int build_id_cache__add_s(const char *sbuild_id, - const char *name, bool is_kallsyms, bool is_vdso); + const char *name, struct nsinfo *nsi, + bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id); extern char buildid_dir[]; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index beda40ed63b0..dc9b49533a8f 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -504,7 +504,14 @@ static void check_data_close(void); */ static int open_dso(struct dso *dso, struct machine *machine) { - int fd = __open_dso(dso, machine); + int fd; + struct nscookie nsc; + + if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) + nsinfo__mountns_enter(dso->nsinfo, &nsc); + fd = __open_dso(dso, machine); + if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) + nsinfo__mountns_exit(&nsc); if (fd >= 0) { dso__list_add(dso); @@ -1302,6 +1309,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; struct dso *pos; + struct nscookie nsc; list_for_each_entry(pos, head, node) { if (with_hits && !pos->hit && !dso__is_vdso(pos)) @@ -1310,11 +1318,13 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) have_build_id = true; continue; } + nsinfo__mountns_enter(pos->nsinfo, &nsc); if (filename__read_build_id(pos->long_name, pos->build_id, sizeof(pos->build_id)) > 0) { have_build_id = true; pos->has_build_id = true; } + nsinfo__mountns_exit(&nsc); } return have_build_id; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 01e779b91c8e..84e301073885 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2124,7 +2124,7 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, return; } strlist__for_each_entry(nd, bidlist) { - pcache = probe_cache__new(nd->s); + pcache = probe_cache__new(nd->s, NULL); if (!pcache) continue; list_for_each_entry(ent, &pcache->entries, node) { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a80895a7e611..d7cd1142f4c6 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2769,7 +2769,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret == -EINVAL && pev->uprobes) warn_uprobe_event_compat(tev); if (ret == 0 && probe_conf.cache) { - cache = probe_cache__new(pev->target); + cache = probe_cache__new(pev->target, pev->nsi); if (!cache || probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 || probe_cache__commit(cache) < 0) @@ -3119,7 +3119,7 @@ static int find_cached_events(struct perf_probe_event *pev, int ntevs = 0; int ret = 0; - cache = probe_cache__new(target); + cache = probe_cache__new(target, pev->nsi); /* Return 0 ("not found") if the target has no probe cache. */ if (!cache) return 0; @@ -3209,7 +3209,7 @@ static int find_probe_trace_events_from_cache(struct perf_probe_event *pev, else return find_cached_events(pev, tevs, pev->target); } - cache = probe_cache__new(pev->target); + cache = probe_cache__new(pev->target, pev->nsi); if (!cache) return 0; diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index d679389e627c..cdf8d83a484c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -412,13 +412,15 @@ int probe_cache_entry__get_event(struct probe_cache_entry *entry, } /* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */ -static int probe_cache__open(struct probe_cache *pcache, const char *target) +static int probe_cache__open(struct probe_cache *pcache, const char *target, + struct nsinfo *nsi) { char cpath[PATH_MAX]; char sbuildid[SBUILD_ID_SIZE]; char *dir_name = NULL; bool is_kallsyms = false; int ret, fd; + struct nscookie nsc; if (target && build_id_cache__cached(target)) { /* This is a cached buildid */ @@ -431,8 +433,11 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target) target = DSO__NAME_KALLSYMS; is_kallsyms = true; ret = sysfs__sprintf_build_id("/", sbuildid); - } else + } else { + nsinfo__mountns_enter(nsi, &nsc); ret = filename__sprintf_build_id(target, sbuildid); + nsinfo__mountns_exit(&nsc); + } if (ret < 0) { pr_debug("Failed to get build-id from %s.\n", target); @@ -441,7 +446,7 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target) /* If we have no buildid cache, make it */ if (!build_id_cache__cached(sbuildid)) { - ret = build_id_cache__add_s(sbuildid, target, + ret = build_id_cache__add_s(sbuildid, target, nsi, is_kallsyms, NULL); if (ret < 0) { pr_debug("Failed to add build-id cache: %s\n", target); @@ -449,7 +454,7 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target) } } - dir_name = build_id_cache__cachedir(sbuildid, target, is_kallsyms, + dir_name = build_id_cache__cachedir(sbuildid, target, nsi, is_kallsyms, false); found: if (!dir_name) { @@ -554,7 +559,7 @@ void probe_cache__delete(struct probe_cache *pcache) free(pcache); } -struct probe_cache *probe_cache__new(const char *target) +struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi) { struct probe_cache *pcache = probe_cache__alloc(); int ret; @@ -562,7 +567,7 @@ struct probe_cache *probe_cache__new(const char *target) if (!pcache) return NULL; - ret = probe_cache__open(pcache, target); + ret = probe_cache__open(pcache, target, nsi); if (ret < 0) { pr_debug("Cache open error: %d\n", ret); goto out_err; @@ -974,7 +979,7 @@ int probe_cache__show_all_caches(struct strfilter *filter) return -EINVAL; } strlist__for_each_entry(nd, bidlist) { - pcache = probe_cache__new(nd->s); + pcache = probe_cache__new(nd->s, NULL); if (!pcache) continue; if (!list_empty(&pcache->entries)) { diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 5ecc9d3925db..2ca4163abafe 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -51,7 +51,7 @@ int probe_file__del_strlist(int fd, struct strlist *namelist); int probe_cache_entry__get_event(struct probe_cache_entry *entry, struct probe_trace_event **tevs); -struct probe_cache *probe_cache__new(const char *target); +struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi); int probe_cache__add_entry(struct probe_cache *pcache, struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs); @@ -69,7 +69,7 @@ int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); bool kretprobe_offset_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ -static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused) +static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused) { return NULL; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 9f3b0d9754a8..e66dc495809a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 21c97cc41cfc..8c7bae545617 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1464,7 +1464,6 @@ static int dso__find_perf_map(char *filebuf, size_t bufsz, return rc; } - int dso__load(struct dso *dso, struct map *map) { char *name; @@ -1565,6 +1564,8 @@ int dso__load(struct dso *dso, struct map *map) for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { struct symsrc *ss = &ss_[ss_pos]; bool next_slot = false; + bool is_reg; + int sirc; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1575,12 +1576,20 @@ int dso__load(struct dso *dso, struct map *map) root_dir, name, PATH_MAX)) continue; - if (!is_regular_file(name)) - continue; + if (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + nsinfo__mountns_exit(&nsc); - /* Name is now the name of the next image to try */ - if (symsrc__init(ss, dso, name, symtab_type) < 0) + is_reg = is_regular_file(name); + sirc = symsrc__init(ss, dso, name, symtab_type); + + if (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + nsinfo__mountns_enter(dso->nsinfo, &nsc); + + if (!is_reg || sirc < 0) { + if (sirc >= 0) + symsrc__destroy(ss); continue; + } if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 988111e0bab5..9e4ea852f636 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -143,13 +143,17 @@ struct strlist *lsdir(const char *name, return list; } -static int slow_copyfile(const char *from, const char *to) +static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi) { int err = -1; char *line = NULL; size_t n; - FILE *from_fp = fopen(from, "r"), *to_fp; + FILE *from_fp, *to_fp; + struct nscookie nsc; + nsinfo__mountns_enter(nsi, &nsc); + from_fp = fopen(from, "r"); + nsinfo__mountns_exit(&nsc); if (from_fp == NULL) goto out; @@ -198,15 +202,21 @@ int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) return size ? -1 : 0; } -int copyfile_mode(const char *from, const char *to, mode_t mode) +static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, + struct nsinfo *nsi) { int fromfd, tofd; struct stat st; - int err = -1; + int err; char *tmp = NULL, *ptr = NULL; + struct nscookie nsc; - if (stat(from, &st)) + nsinfo__mountns_enter(nsi, &nsc); + err = stat(from, &st); + nsinfo__mountns_exit(&nsc); + if (err) goto out; + err = -1; /* extra 'x' at the end is to reserve space for '.' */ if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { @@ -227,11 +237,13 @@ int copyfile_mode(const char *from, const char *to, mode_t mode) goto out_close_to; if (st.st_size == 0) { /* /proc? do it slowly... */ - err = slow_copyfile(from, tmp); + err = slow_copyfile(from, tmp, nsi); goto out_close_to; } + nsinfo__mountns_enter(nsi, &nsc); fromfd = open(from, O_RDONLY); + nsinfo__mountns_exit(&nsc); if (fromfd < 0) goto out_close_to; @@ -248,6 +260,16 @@ int copyfile_mode(const char *from, const char *to, mode_t mode) return err; } +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi) +{ + return copyfile_mode_ns(from, to, 0755, nsi); +} + +int copyfile_mode(const char *from, const char *to, mode_t mode) +{ + return copyfile_mode_ns(from, to, mode, NULL); +} + int copyfile(const char *from, const char *to) { return copyfile_mode(from, to, 0755); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 1e5fe1d9ec32..062dd20437f7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -12,6 +12,7 @@ #include #include #include +#include "namespaces.h" /* General helper functions */ void usage(const char *err) __noreturn; @@ -33,6 +34,7 @@ struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dire bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); +int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); ssize_t readn(int fd, void *buf, size_t n); From d2396999c998b4e0006aef247e154eff0ed3d8f9 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:13 -0700 Subject: [PATCH 038/253] perf buildid-cache: Cache debuginfo If a stripped binary is placed in the cache, the user is in a situation where there's a cached elf file present, but it doesn't have any symtab to use for name resolution. Grab the debuginfo for binaries that don't end in .ko. This yields a better chance of resolving symbols from older traces. Signed-off-by: Krister Johansen Cc: Alexander Shishkin Cc: Brendan Gregg Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/1499305693-1599-7-git-send-email-kjlx@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 2 +- tools/perf/util/annotate.c | 2 +- tools/perf/util/build-id.c | 72 +++++++++++++++++++++++++++--- tools/perf/util/build-id.h | 3 +- tools/perf/util/dso.c | 8 +++- tools/perf/util/dso.h | 1 + tools/perf/util/machine.c | 3 +- tools/perf/util/symbol.c | 12 +++-- 8 files changed, 90 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d65bd86bee99..e3eb6240ced0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -243,7 +243,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) char filename[PATH_MAX]; u8 build_id[BUILD_ID_SIZE]; - if (dso__build_id_filename(dso, filename, sizeof(filename)) && + if (dso__build_id_filename(dso, filename, sizeof(filename), false) && filename__read_build_id(filename, build_id, sizeof(build_id)) != sizeof(build_id)) { if (errno == ENOENT) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ef434b53d849..1742510f0120 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1347,7 +1347,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil !dso__is_kcore(dso)) return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; - build_id_filename = dso__build_id_filename(dso, NULL, 0); + build_id_filename = dso__build_id_filename(dso, NULL, 0, false); if (build_id_filename) { __symbol__join_symfs(filename, filename_size, build_id_filename); free(build_id_filename); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index f7bfd90a7388..e9665150e9b1 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -243,12 +243,15 @@ static bool build_id_cache__valid_id(char *sbuild_id) return result; } -static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso) +static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso, + bool is_debug) { - return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf"); + return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : (is_debug ? + "debug" : "elf")); } -char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug) { bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); bool is_vdso = dso__is_vdso((struct dso *)dso); @@ -270,7 +273,8 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) ret = asnprintf(&bf, size, "%s", linkname); else ret = asnprintf(&bf, size, "%s/%s", linkname, - build_id_cache__basename(is_kallsyms, is_vdso)); + build_id_cache__basename(is_kallsyms, is_vdso, + is_debug)); if (ret < 0 || (!alloc && size < (unsigned int)ret)) bf = NULL; free(linkname); @@ -603,12 +607,40 @@ static int build_id_cache__add_sdt_cache(const char *sbuild_id, #define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0) #endif +static char *build_id_cache__find_debug(const char *sbuild_id, + struct nsinfo *nsi) +{ + char *realname = NULL; + char *debugfile; + struct nscookie nsc; + size_t len = 0; + + debugfile = calloc(1, PATH_MAX); + if (!debugfile) + goto out; + + len = __symbol__join_symfs(debugfile, PATH_MAX, + "/usr/lib/debug/.build-id/"); + snprintf(debugfile + len, PATH_MAX - len, "%.2s/%s.debug", sbuild_id, + sbuild_id + 2); + + nsinfo__mountns_enter(nsi, &nsc); + realname = realpath(debugfile, NULL); + if (realname && access(realname, R_OK)) + zfree(&realname); + nsinfo__mountns_exit(&nsc); +out: + free(debugfile); + return realname; +} + int build_id_cache__add_s(const char *sbuild_id, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname = NULL, *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp; + char *debugfile = NULL; int err = -1; if (!is_kallsyms) { @@ -635,7 +667,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, /* Save the allocated buildid dirname */ if (asprintf(&filename, "%s/%s", dir_name, - build_id_cache__basename(is_kallsyms, is_vdso)) < 0) { + build_id_cache__basename(is_kallsyms, is_vdso, + false)) < 0) { filename = NULL; goto out_free; } @@ -652,6 +685,34 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } + /* Some binaries are stripped, but have .debug files with their symbol + * table. Check to see if we can locate one of those, since the elf + * file itself may not be very useful to users of our tools without a + * symtab. + */ + if (!is_kallsyms && !is_vdso && + strncmp(".ko", name + strlen(name) - 3, 3)) { + debugfile = build_id_cache__find_debug(sbuild_id, nsi); + if (debugfile) { + zfree(&filename); + if (asprintf(&filename, "%s/%s", dir_name, + build_id_cache__basename(false, false, true)) < 0) { + filename = NULL; + goto out_free; + } + if (access(filename, F_OK)) { + if (nsi && nsi->need_setns) { + if (copyfile_ns(debugfile, filename, + nsi)) + goto out_free; + } else if (link(debugfile, filename) && + errno != EEXIST && + copyfile(debugfile, filename)) + goto out_free; + } + } + } + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; tmp = strrchr(linkname, '/'); @@ -676,6 +737,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, if (!is_kallsyms) free(realname); free(filename); + free(debugfile); free(dir_name); free(linkname); return err; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 23970847d4c4..113dc0615c57 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -17,7 +17,8 @@ int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); -char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); +char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, + bool is_debug); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index dc9b49533a8f..b9e087fb8247 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -32,6 +32,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__JAVA_JIT] = 'j', [DSO_BINARY_TYPE__DEBUGLINK] = 'l', [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', + [DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D', [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f', [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u', [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o', @@ -97,7 +98,12 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } case DSO_BINARY_TYPE__BUILD_ID_CACHE: - if (dso__build_id_filename(dso, filename, size) == NULL) + if (dso__build_id_filename(dso, filename, size, false) == NULL) + ret = -1; + break; + + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + if (dso__build_id_filename(dso, filename, size, true) == NULL) ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 78ec637fc68b..f886141678eb 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -21,6 +21,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__JAVA_JIT, DSO_BINARY_TYPE__DEBUGLINK, DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 246b441110a1..a54a2be5eda4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -705,7 +705,8 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) if (kdso->has_build_id) { char filename[PATH_MAX]; - if (dso__build_id_filename(kdso, filename, sizeof(filename))) + if (dso__build_id_filename(kdso, filename, sizeof(filename), + false)) printed += fprintf(fp, "[0] %s\n", filename); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8c7bae545617..971b990557b4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -53,6 +53,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__JAVA_JIT, DSO_BINARY_TYPE__DEBUGLINK, DSO_BINARY_TYPE__BUILD_ID_CACHE, + DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, @@ -1418,6 +1419,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return kmod && dso->symtab_type == type; case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: return true; case DSO_BINARY_TYPE__NOT_FOUND: @@ -1565,10 +1567,14 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *ss = &ss_[ss_pos]; bool next_slot = false; bool is_reg; + bool nsexit; int sirc; enum dso_binary_type symtab_type = binary_type_symtab[i]; + nsexit = (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE || + symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO); + if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type)) continue; @@ -1576,13 +1582,13 @@ int dso__load(struct dso *dso, struct map *map) root_dir, name, PATH_MAX)) continue; - if (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (nsexit) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); sirc = symsrc__init(ss, dso, name, symtab_type); - if (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); if (!is_reg || sirc < 0) { @@ -1724,7 +1730,7 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map) } if (!symbol_conf.ignore_vmlinux_buildid) - filename = dso__build_id_filename(dso, NULL, 0); + filename = dso__build_id_filename(dso, NULL, 0, false); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) From 30269dc1a17df5117a2e267835863d4466c3569d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Jul 2017 13:05:43 -0300 Subject: [PATCH 039/253] perf evsel: Allow asking for max precise_ip in new_cycles() There are cases where we want to leave attr.precise_ip as zero, such as when using 'perf record --no-samples', where this would make the kernel return -EINVAL. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-4zq1udecxa51gsapyfwej5fj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 7 +++++-- tools/perf/util/evsel.h | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 46c0faf6c502..f51c3314015c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -244,7 +244,7 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) int perf_evlist__add_default(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__new_cycles(); + struct perf_evsel *evsel = perf_evsel__new_cycles(true); if (evsel == NULL) return -ENOMEM; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0e4cd6092564..df567e478a0e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -268,7 +268,7 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } -struct perf_evsel *perf_evsel__new_cycles(void) +struct perf_evsel *perf_evsel__new_cycles(bool precise) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, @@ -278,6 +278,9 @@ struct perf_evsel *perf_evsel__new_cycles(void) struct perf_evsel *evsel; event_attr_init(&attr); + + if (!precise) + goto new_event; /* * Unnamed union member, not supported as struct member named * initializer in older compilers such as gcc 4.4.7 @@ -292,7 +295,7 @@ struct perf_evsel *perf_evsel__new_cycles(void) * to kick in when we return and before perf_evsel__open() is called. */ attr.sample_period = 0; - +new_event: evsel = perf_evsel__new(&attr); if (evsel == NULL) goto out; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 219ad0cdb9f4..fb40ca3c6519 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -185,7 +185,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char * return perf_evsel__newtp_idx(sys, name, 0); } -struct perf_evsel *perf_evsel__new_cycles(void); +struct perf_evsel *perf_evsel__new_cycles(bool precise); struct event_format *event_format__new(const char *sys, const char *name); From db918acb356b8770a0fafbe96743031a56b91af6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Jul 2017 13:10:20 -0300 Subject: [PATCH 040/253] perf evlist: Allow asking for max precise_ip in add_default() There are cases where we want to leave attr.precise_ip as zero, such as when using 'perf record --no-samples', where this would make the kernel return -EINVAL. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0u2m2a8rqw781r6m8svqyne8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 4 ++-- tools/perf/util/evlist.h | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f51c3314015c..078b58511595 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -242,9 +242,9 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) } } -int perf_evlist__add_default(struct perf_evlist *evlist) +int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) { - struct perf_evsel *evsel = perf_evsel__new_cycles(true); + struct perf_evsel *evsel = perf_evsel__new_cycles(precise); if (evsel == NULL) return -ENOMEM; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8d601fbdd8d6..0843746bc389 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -115,7 +115,14 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); -int perf_evlist__add_default(struct perf_evlist *evlist); + +int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise); + +static inline int perf_evlist__add_default(struct perf_evlist *evlist) +{ + return __perf_evlist__add_default(evlist, true); +} + int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); From 4b4cd50319127b3b606b8aaf3426d8972df1666a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Jul 2017 13:26:32 -0300 Subject: [PATCH 041/253] perf record: Do not ask for precise_ip with --no-samples When the user doesn't specify an event with -e/--event, 'perf record' will use as a default the "cycles" event with the highest level of precision in perf_event_attr.precise_ip, but --no-samples, if present, is incompatible with precise_ip != 0, so use the newly introduced __perf_event__add_default(precise = false) to fix that: Before: # perf record -n usleep 1 Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles:ppp). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? # After: # perf record -n usleep 1 Please consider tweaking /proc/sys/kernel/perf_event_max_sample_rate. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data ] [root@jouet /]# perf evlist -v cycles: size: 112, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 [root@jouet /]# Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-q991fw6s6rhjvrd5ye4t7qom@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 17a14bcce34a..64eef9a567d9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1821,7 +1821,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->nr_entries == 0 && - perf_evlist__add_default(rec->evlist) < 0) { + __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { pr_err("Not enough memory for event selector list\n"); goto out; } From 8526bafc149ee4d0df5eabca0f440164ec705c99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 1 Jul 2017 11:21:59 -0300 Subject: [PATCH 042/253] perf test sdt: Handle realpath() failure It can return NULL, in which case we should bail out and remove the directory created with mkdtemp(), which is stored in the "__tempdir" variable, not in "tempdir". Cc: Masami Hiramatsu Fixes: 8e5dc848356e ("perf test: Add a test case for SDT event") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sdt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 5abe8cea54e6..d80171526f6f 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -83,6 +83,8 @@ int test__sdt_event(int subtests __maybe_unused) } /* Note that buildid_dir must be an absolute path */ tempdir = realpath(__tempdir, NULL); + if (tempdir == NULL) + goto error_rmdir; /* At first, scan itself */ set_buildid_dir(tempdir); @@ -100,7 +102,7 @@ int test__sdt_event(int subtests __maybe_unused) error_rmdir: /* Cleanup temporary buildid dir */ - rm_rf(tempdir); + rm_rf(__tempdir); error: free(tempdir); free(myself); From d78ada4a767a744cad5efa210b8acf57748b91d7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:17 +0200 Subject: [PATCH 043/253] perf tests attr: Do not store failed events Do not mess up our temp space with files we don't need - failed event open attempts. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0e77b2cf61ec..08b1d5ce9020 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -136,7 +136,7 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, { int errno_saved = errno; - if (store_event(attr, pid, cpu, fd, group_fd, flags)) { + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { pr_err("test attr FAILED"); exit(128); } From 10213e2ff2e0453324689eb50e73df3df1ee6163 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:18 +0200 Subject: [PATCH 044/253] perf tests attr: Add test_attr__ready function We create many test events before the real ones just to test specific features. But there's no way for attr tests to separate those test events from those it needs to check. Adding 'ready' call from the events open interface to trigger/start events collection for attr test. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 1 + tools/perf/tests/attr.c | 10 ++++++++++ tools/perf/util/evsel.c | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 806c216a1078..2c010dd6a79d 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -7,6 +7,7 @@ #include extern bool test_attr__enabled; +void test_attr__ready(void); void test_attr__init(void); void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 08b1d5ce9020..84c0eb598a67 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -36,6 +36,7 @@ #define ENV "PERF_TEST_ATTR" static char *dir; +static bool ready; void test_attr__init(void) { @@ -67,6 +68,9 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, FILE *file; char path[PATH_MAX]; + if (!ready) + return 0; + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, attr->type, attr->config, fd); @@ -144,6 +148,12 @@ void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, errno = errno_saved; } +void test_attr__ready(void) +{ + if (unlikely(test_attr__enabled) && !ready) + ready = true; +} + static int run_dir(const char *d, const char *perf) { char v[] = "-vvvvv"; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index df567e478a0e..6dd069a41ac3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -58,6 +58,8 @@ static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused) return 0; } +void __weak test_attr__ready(void) { } + static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused) { } @@ -1572,6 +1574,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, cpus->map[cpu], group_fd, flags); + test_attr__ready(); + fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags); From 04c31bcf86c4fa51022ec254827f1c7c41306167 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:19 +0200 Subject: [PATCH 045/253] perf tests attr: Make compare_data global Making compare_data global, so it could be used outside the Test class scope to compare command results. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index cdf21a9d0c35..bb671cd1d66a 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -9,6 +9,20 @@ import logging import shutil import ConfigParser +def compare_data(a, b): + # Allow multiple values in assignment separated by '|' + a_list = a.split('|') + b_list = b.split('|') + + for a_item in a_list: + for b_item in b_list: + if (a_item == b_item): + return True + elif (a_item == '*') or (b_item == '*'): + return True + + return False + class Fail(Exception): def __init__(self, test, msg): self.msg = msg @@ -82,26 +96,12 @@ class Event(dict): self.add(base) self.add(data) - def compare_data(self, a, b): - # Allow multiple values in assignment separated by '|' - a_list = a.split('|') - b_list = b.split('|') - - for a_item in a_list: - for b_item in b_list: - if (a_item == b_item): - return True - elif (a_item == '*') or (b_item == '*'): - return True - - return False - def equal(self, other): for t in Event.terms: log.debug(" [%s] %s %s" % (t, self[t], other[t])); if not self.has_key(t) or not other.has_key(t): return False - if not self.compare_data(self[t], other[t]): + if not compare_data(self[t], other[t]): return False return True @@ -109,7 +109,7 @@ class Event(dict): for t in Event.terms: if not self.has_key(t) or not other.has_key(t): continue - if not self.compare_data(self[t], other[t]): + if not compare_data(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) # Test file description needs to have following sections: @@ -218,9 +218,9 @@ class Test(object): self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) - log.info(" '%s' ret %d " % (cmd, ret)) + log.info(" '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret))) - if ret != int(self.ret): + if not compare_data(str(ret), str(self.ret)): raise Unsup(self) def compare(self, expect, result): From dde622a5063c71a1864a41dfaed4baaabfa84ba6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:20 +0200 Subject: [PATCH 046/253] perf tests attr: Rename compare_data to data_equal The data_equal name fits better to the return value of the function. It's true when the data is equal. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index bb671cd1d66a..b03261c6b1ed 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -9,7 +9,7 @@ import logging import shutil import ConfigParser -def compare_data(a, b): +def data_equal(a, b): # Allow multiple values in assignment separated by '|' a_list = a.split('|') b_list = b.split('|') @@ -101,7 +101,7 @@ class Event(dict): log.debug(" [%s] %s %s" % (t, self[t], other[t])); if not self.has_key(t) or not other.has_key(t): return False - if not compare_data(self[t], other[t]): + if not data_equal(self[t], other[t]): return False return True @@ -109,7 +109,7 @@ class Event(dict): for t in Event.terms: if not self.has_key(t) or not other.has_key(t): continue - if not compare_data(self[t], other[t]): + if not data_equal(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) # Test file description needs to have following sections: @@ -220,7 +220,7 @@ class Test(object): log.info(" '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret))) - if not compare_data(str(ret), str(self.ret)): + if not data_equal(str(ret), str(self.ret)): raise Unsup(self) def compare(self, expect, result): From 5ced95b2374b0fb3c1d0367c6e8cf82d41292990 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:21 +0200 Subject: [PATCH 047/253] perf tests attr: Add 1s for exclude_kernel and task base bits There's an event open fallback which set exclude_kernel=1 in case use does not have enough privileges. Adding both 0|1 for this attribute, because we don't know what value it is. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/base-stat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 7e6d74946e04..cdf3adf7d389 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -15,7 +15,7 @@ inherit=1 pinned=0 exclusive=0 exclude_user=0 -exclude_kernel=0 +exclude_kernel=0|1 exclude_hv=0 exclude_idle=0 mmap=1 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index f4cf148f14cb..ff9b33d4ae9e 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -15,7 +15,7 @@ inherit=1 pinned=0 exclusive=0 exclude_user=0 -exclude_kernel=0 +exclude_kernel=0|1 exclude_hv=0 exclude_idle=0 mmap=0 From d9115e924088b714bb801713751bf7922d799f95 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:22 +0200 Subject: [PATCH 048/253] perf tests attr: Fix record dwarf test Following commit: commit 5c0cf22477ea ("perf record: Store data mmaps for dwarf unwind") have enabled address sampling for dwarf unwind, we need to reflect that in this test by adding ADDR sample_type and enabling mmap_data. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-record-graph-dwarf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf index d6f324ea578c..8321c602dc38 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/attr/test-record-graph-dwarf @@ -3,8 +3,9 @@ command = record args = --call-graph dwarf -- kill >/dev/null 2>&1 [event:base-record] -sample_type=12583 +sample_type=45359 exclude_callchain_user=1 sample_stack_user=8192 # TODO different for each arch, no support for that now sample_regs_user=* +mmap_data=1 From 44fed277f81ba22e3f2fbcf1501c3b14aeb9f8e4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:23 +0200 Subject: [PATCH 049/253] perf tests attr: Fix no-delay test Following commit: commit 509051ea8427 ("perf record: Rename --no-delay to --no-buffering") removed '-D' option and renamed --no-delay into --no-buffering. Fixing that in the attr tests. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Fixes: 509051ea8427 ("perf record: Rename --no-delay to --no-buffering") Link: http://lkml.kernel.org/r/20170703145030.12903-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../attr/{test-record-no-delay => test-record-no-buffering} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tools/perf/tests/attr/{test-record-no-delay => test-record-no-buffering} (70%) diff --git a/tools/perf/tests/attr/test-record-no-delay b/tools/perf/tests/attr/test-record-no-buffering similarity index 70% rename from tools/perf/tests/attr/test-record-no-delay rename to tools/perf/tests/attr/test-record-no-buffering index f253b78cdbf2..0b0355af013a 100644 --- a/tools/perf/tests/attr/test-record-no-delay +++ b/tools/perf/tests/attr/test-record-no-buffering @@ -1,6 +1,6 @@ [config] command = record -args = -D kill >/dev/null 2>&1 +args = --no-buffering kill >/dev/null 2>&1 [event:base-record] sample_period=4000 From a72fe0afa18abd8d05d26944b32850775cb6c941 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:24 +0200 Subject: [PATCH 050/253] perf tests attr: Add proper return values The record command now properly returns the status of the tracee if there's any. We need to properly set the expected return value of the tracee in the attr tests. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-record-C0 | 1 + tools/perf/tests/attr/test-record-basic | 1 + tools/perf/tests/attr/test-record-branch-any | 1 + tools/perf/tests/attr/test-record-branch-filter-any | 1 + tools/perf/tests/attr/test-record-branch-filter-any_call | 1 + tools/perf/tests/attr/test-record-branch-filter-any_ret | 1 + tools/perf/tests/attr/test-record-branch-filter-hv | 1 + tools/perf/tests/attr/test-record-branch-filter-ind_call | 1 + tools/perf/tests/attr/test-record-branch-filter-k | 1 + tools/perf/tests/attr/test-record-branch-filter-u | 1 + tools/perf/tests/attr/test-record-count | 1 + tools/perf/tests/attr/test-record-data | 1 + tools/perf/tests/attr/test-record-freq | 1 + tools/perf/tests/attr/test-record-graph-default | 1 + tools/perf/tests/attr/test-record-graph-dwarf | 1 + tools/perf/tests/attr/test-record-graph-fp | 1 + tools/perf/tests/attr/test-record-group | 1 + tools/perf/tests/attr/test-record-group-sampling | 1 + tools/perf/tests/attr/test-record-group1 | 1 + tools/perf/tests/attr/test-record-no-buffering | 1 + tools/perf/tests/attr/test-record-no-inherit | 1 + tools/perf/tests/attr/test-record-no-samples | 1 + tools/perf/tests/attr/test-record-period | 1 + tools/perf/tests/attr/test-record-raw | 1 + 24 files changed, 24 insertions(+) diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0 index d6a7e43f61b3..cb0a3138fa54 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/attr/test-record-C0 @@ -1,6 +1,7 @@ [config] command = record args = -C 0 kill >/dev/null 2>&1 +ret = 1 [event:base-record] cpu=0 diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/attr/test-record-basic index 55c0428370ca..85a23cf35ba1 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/attr/test-record-basic @@ -1,5 +1,6 @@ [config] command = record args = kill >/dev/null 2>&1 +ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any index 1421960ed4e9..a2fe031e6f87 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/attr/test-record-branch-any @@ -1,6 +1,7 @@ [config] command = record args = -b kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any index 915c4df0e0c2..7df6a489c9ac 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/attr/test-record-branch-filter-any @@ -1,6 +1,7 @@ [config] command = record args = -j any kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call index 8708dbd4f373..6c93e07da61c 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/attr/test-record-branch-filter-any_call @@ -1,6 +1,7 @@ [config] command = record args = -j any_call kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret index 0d3607a6dcbe..daf8b83e257f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret @@ -1,6 +1,7 @@ [config] command = record args = -j any_ret kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv index f25526740cec..fbf491da4f33 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/attr/test-record-branch-filter-hv @@ -1,6 +1,7 @@ [config] command = record args = -j hv kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call index e862dd179128..c63cc50d2269 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call @@ -1,6 +1,7 @@ [config] command = record args = -j ind_call kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k index 182971e898f5..e0f2f09ff07b 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/attr/test-record-branch-filter-k @@ -1,6 +1,7 @@ [config] command = record args = -j k kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u index 83449ef9e687..6cd36e01b6fc 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/attr/test-record-branch-filter-u @@ -1,6 +1,7 @@ [config] command = record args = -j u kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/attr/test-record-count index 2f841de56f6b..34f6cc577263 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/attr/test-record-count @@ -1,6 +1,7 @@ [config] command = record args = -c 123 kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=123 diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data index 716e143b5291..53c8890f6e5f 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/attr/test-record-data @@ -1,6 +1,7 @@ [config] command = record args = -d kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/attr/test-record-freq index 600d0f8f2583..bf4cb459f0d5 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/attr/test-record-freq @@ -1,6 +1,7 @@ [config] command = record args = -F 100 kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=100 diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index 853597a9a8f6..0b216e69760c 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -1,6 +1,7 @@ [config] command = record args = -g kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf index 8321c602dc38..da2fa73bd0a2 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/attr/test-record-graph-dwarf @@ -1,6 +1,7 @@ [config] command = record args = --call-graph dwarf -- kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_type=45359 diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index 055e3bee7993..625d190bb798 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -1,6 +1,7 @@ [config] command = record args = --call-graph fp kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_type=295 diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index 57739cacdb2a..6e7961f6f7a5 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -1,6 +1,7 @@ [config] command = record args = --group -e cycles,instructions kill >/dev/null 2>&1 +ret = 1 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index 658f5d60c873..ef59afd6d635 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -1,6 +1,7 @@ [config] command = record args = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index c5548d054aff..87a222d014d8 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -1,6 +1,7 @@ [config] command = record args = -e '{cycles,instructions}' kill >/dev/null 2>&1 +ret = 1 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/attr/test-record-no-buffering index 0b0355af013a..5bf349aeeb69 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/attr/test-record-no-buffering @@ -1,6 +1,7 @@ [config] command = record args = --no-buffering kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit index 44edcb2edcd5..560943decb87 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/attr/test-record-no-inherit @@ -1,6 +1,7 @@ [config] command = record args = -i kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_type=263 diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/attr/test-record-no-samples index d0141b2418b5..8eb73ab639e0 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/attr/test-record-no-samples @@ -1,6 +1,7 @@ [config] command = record args = -n kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=0 diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/attr/test-record-period index 8abc5314fc52..69bc748f0f27 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/attr/test-record-period @@ -1,6 +1,7 @@ [config] command = record args = -c 100 -P kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=100 diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw index 4a8ef25b5f49..3b89f99c240b 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/attr/test-record-raw @@ -1,6 +1,7 @@ [config] command = record args = -R kill >/dev/null 2>&1 +ret = 1 [event:base-record] sample_period=4000 From 6f193400eaf1de53145cc13ff69f88336e9a9c9a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:25 +0200 Subject: [PATCH 051/253] perf tests attr: Fix cpu test disabled term setup The stat command creates all events disabled and enables them either manualy or via the enable_on_exec bit. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-stat-C0 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/attr/test-stat-C0 index aa835950751f..67717fe6a65d 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/attr/test-stat-C0 @@ -4,6 +4,6 @@ args = -e cycles -C 0 kill >/dev/null 2>&1 ret = 1 [event:base-stat] -# events are enabled by default when attached to cpu -disabled=0 +# events are disabled by default when attached to cpu +disabled=1 enable_on_exec=0 From 5ff0cf421c37f53bb658a097c610cc1e3ced1133 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:26 +0200 Subject: [PATCH 052/253] perf tests attr: Fix sample_period setup The final period can differ from what user specifies on command line due to the perf_event_max_sample_rate sysctl setup. Thus we can't predixt the sample_period value any more. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/test-record-branch-any | 1 - tools/perf/tests/attr/test-record-branch-filter-any | 1 - tools/perf/tests/attr/test-record-branch-filter-any_call | 1 - tools/perf/tests/attr/test-record-branch-filter-any_ret | 1 - tools/perf/tests/attr/test-record-branch-filter-hv | 1 - tools/perf/tests/attr/test-record-branch-filter-ind_call | 1 - tools/perf/tests/attr/test-record-branch-filter-k | 1 - tools/perf/tests/attr/test-record-branch-filter-u | 1 - tools/perf/tests/attr/test-record-data | 2 -- tools/perf/tests/attr/test-record-no-buffering | 1 - tools/perf/tests/attr/test-record-raw | 1 - 12 files changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index cdf3adf7d389..a9cb72b8fbb1 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -7,7 +7,7 @@ cpu=* type=0|1 size=112 config=0 -sample_period=4000 +sample_period=* sample_type=263 read_format=0 disabled=1 diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any index a2fe031e6f87..81f839e2fad0 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/attr/test-record-branch-any @@ -4,6 +4,5 @@ args = -b kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=8 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any index 7df6a489c9ac..357421f4dfce 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/attr/test-record-branch-filter-any @@ -4,6 +4,5 @@ args = -j any kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=8 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call index 6c93e07da61c..dbc55f2ab845 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/attr/test-record-branch-filter-any_call @@ -4,6 +4,5 @@ args = -j any_call kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=16 diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret index daf8b83e257f..a0824ff8e131 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret @@ -4,6 +4,5 @@ args = -j any_ret kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=32 diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv index fbf491da4f33..f34d6f120181 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/attr/test-record-branch-filter-hv @@ -4,6 +4,5 @@ args = -j hv kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=8 diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call index c63cc50d2269..b86a35232248 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call @@ -4,6 +4,5 @@ args = -j ind_call kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=64 diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k index e0f2f09ff07b..d3fbc5e1858a 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/attr/test-record-branch-filter-k @@ -4,6 +4,5 @@ args = -j k kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=8 diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u index 6cd36e01b6fc..a318f0dda173 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/attr/test-record-branch-filter-u @@ -4,6 +4,5 @@ args = -j u kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=2311 branch_sample_type=8 diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data index 53c8890f6e5f..a9cf2233b0ce 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/attr/test-record-data @@ -4,8 +4,6 @@ args = -d kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 - # sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | # PERF_SAMPLE_ADDR | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC sample_type=33039 diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/attr/test-record-no-buffering index 5bf349aeeb69..aa3956d8fe20 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/attr/test-record-no-buffering @@ -4,7 +4,6 @@ args = --no-buffering kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=263 watermark=0 wakeup_events=1 diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw index 3b89f99c240b..a188a614a44c 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/attr/test-record-raw @@ -4,5 +4,4 @@ args = -R kill >/dev/null 2>&1 ret = 1 [event:base-record] -sample_period=4000 sample_type=1415 From 042049404f7a149446d5c839a088fb928b11ce7a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:27 +0200 Subject: [PATCH 053/253] perf tests attr: Fix precise_ip setup We have a test to detect to highest precise possible, so test can't just predict precise_ip value. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-record | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index a9cb72b8fbb1..31e0b1da830b 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -25,7 +25,7 @@ inherit_stat=0 enable_on_exec=1 task=0 watermark=0 -precise_ip=0 +precise_ip=0|1|2|3 mmap_data=0 sample_id_all=1 exclude_host=0|1 From b78e92e6071d5131f5ba6b25437d82c0fcef09de Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:28 +0200 Subject: [PATCH 054/253] perf tests attr: Fix stat sample_type setup >From following commit: commit 4979d0c7d0c7 ("perf stat record: Add record command") we started to assign PERF_SAMPLE_IDENTIFIER to sample_type. Fixing the attr stat tests accordingly. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/base-stat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index ff9b33d4ae9e..4d0c2e42b64e 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -8,7 +8,7 @@ type=0 size=112 config=0 sample_period=0 -sample_type=0 +sample_type=65536 read_format=3 disabled=1 inherit=1 From 1f41873c22826fc92d4bb03c58cf44664fdc3bb8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Jul 2017 16:50:29 +0200 Subject: [PATCH 055/253] perf tests attr: Add optional term Some of the stat events are quite rare to find on common machines (like front end cycles). Adding an 'optional' term to mark such events in attr tests. Event marked as optional will not fail the test case if it's not found in results. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20170703145030.12903-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.py | 14 +++++++++++--- tools/perf/tests/attr/test-stat-default | 2 ++ tools/perf/tests/attr/test-stat-detailed-1 | 2 ++ tools/perf/tests/attr/test-stat-detailed-2 | 3 +++ tools/perf/tests/attr/test-stat-detailed-3 | 5 +++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index b03261c6b1ed..6bb50e82a3e3 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -105,6 +105,11 @@ class Event(dict): return False return True + def optional(self): + if self.has_key('optional') and self['optional'] == '1': + return True + return False + def diff(self, other): for t in Event.terms: if not self.has_key(t) or not other.has_key(t): @@ -244,9 +249,12 @@ class Test(object): log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list))) # we did not any matching event - fail - if (not exp_list): - exp_event.diff(res_event) - raise Fail(self, 'match failure'); + if not exp_list: + if exp_event.optional(): + log.debug(" %s does not match, but is optional" % exp_name) + else: + exp_event.diff(res_event) + raise Fail(self, 'match failure'); match[exp_name] = exp_list diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index 19270f54c96e..e911dbd4eb47 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -38,12 +38,14 @@ config=0 fd=6 type=0 config=7 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND [event7:base-stat] fd=7 type=0 config=8 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS [event8:base-stat] diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index 51426b87153b..b39270a08e74 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -39,12 +39,14 @@ config=0 fd=6 type=0 config=7 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND [event7:base-stat] fd=7 type=0 config=8 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS [event8:base-stat] diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 8de5acc31c27..45f8e6ea34f8 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -39,12 +39,14 @@ config=0 fd=6 type=0 config=7 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND [event7:base-stat] fd=7 type=0 config=8 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS [event8:base-stat] @@ -108,6 +110,7 @@ config=65538 fd=15 type=3 config=1 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index 0a1f45bf7d79..30ae0fb7a3fd 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -39,12 +39,14 @@ config=0 fd=6 type=0 config=7 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND [event7:base-stat] fd=7 type=0 config=8 +optional=1 # PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS [event8:base-stat] @@ -108,6 +110,7 @@ config=65538 fd=15 type=3 config=1 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1I << 0 | @@ -162,6 +165,7 @@ config=65540 fd=21 type=3 config=512 +optional=1 # PERF_TYPE_HW_CACHE, # PERF_COUNT_HW_CACHE_L1D << 0 | @@ -171,3 +175,4 @@ config=512 fd=22 type=3 config=66048 +optional=1 From 0ae79636e32889cced620865debd3ba5bfd36daa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 10:31:42 -0300 Subject: [PATCH 056/253] perf trace beauty: Export strarray for use in per-object beautifiers Like will be done with fcntl(fd, F_GETLEASE, F_RDLCK|F_WRLCK|F_UNLCK) Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3p11bgirtntjfmbixfkz8i2m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 27 +++++++-------------------- tools/perf/trace/beauty/beauty.h | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 65fa0126e939..0dedce0ca0bb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -283,34 +283,21 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void ({ struct syscall_tp *fields = evsel->priv; \ fields->name.pointer(&fields->name, sample); }) -struct strarray { - int offset; - int nr_entries; - const char **entries; -}; +size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val) +{ + int idx = val - sa->offset; -#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ - .nr_entries = ARRAY_SIZE(array), \ - .entries = array, \ -} + if (idx < 0 || idx >= sa->nr_entries) + return scnprintf(bf, size, intfmt, val); -#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ - .offset = off, \ - .nr_entries = ARRAY_SIZE(array), \ - .entries = array, \ + return scnprintf(bf, size, "%s", sa->entries[idx]); } static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, const char *intfmt, struct syscall_arg *arg) { - struct strarray *sa = arg->parm; - int idx = arg->val - sa->offset; - - if (idx < 0 || idx >= sa->nr_entries) - return scnprintf(bf, size, intfmt, arg->val); - - return scnprintf(bf, size, "%s", sa->entries[idx]); + return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val); } static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 9ccf0f323fe5..f75ef7d0b303 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -1,8 +1,28 @@ #ifndef _PERF_TRACE_BEAUTY_H #define _PERF_TRACE_BEAUTY_H +#include #include +struct strarray { + int offset; + int nr_entries; + const char **entries; +}; + +#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \ + .nr_entries = ARRAY_SIZE(array), \ + .entries = array, \ +} + +#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \ + .offset = off, \ + .nr_entries = ARRAY_SIZE(array), \ + .entries = array, \ +} + +size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val); + struct trace; struct thread; From 65dfa1e7799edd114fbd3949f2783f4e69fd0305 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 10:46:13 -0300 Subject: [PATCH 057/253] perf trace beauty fcntl: Beautify F_GETLEASE and F_SETLEASE arg/return One more looking prettier. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ytr7idkese8sjtvn5g60130e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index d082fd2e594f..b6987c468d18 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -20,6 +20,19 @@ static size_t syscall_arg__scnprintf_fcntl_getfd(char *bf, size_t size, struct s return fcntl__scnprintf_getfd(arg->val, bf, size); } +static size_t fcntl__scnprintf_getlease(unsigned long val, char *bf, size_t size) +{ + static const char *fcntl_setlease[] = { "RDLCK", "WRLCK", "UNLCK", }; + static DEFINE_STRARRAY(fcntl_setlease); + + return strarray__scnprintf(&strarray__fcntl_setlease, bf, size, "%x", val); +} + +static size_t syscall_arg__scnprintf_fcntl_getlease(char *bf, size_t size, struct syscall_arg *arg) +{ + return fcntl__scnprintf_getlease(arg->val, bf, size); +} + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg) { if (arg->val == F_GETFL) { @@ -38,11 +51,15 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_pid); goto mask_arg; } + if (arg->val == F_GETLEASE) { + syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getlease); + goto mask_arg; + } /* * Some commands ignore the third fcntl argument, "arg", so mask it */ if (arg->val == F_GET_SEALS || - arg->val == F_GETLEASE || arg->val == F_GETSIG) { + arg->val == F_GETSIG) { mask_arg: arg->mask |= (1 << 2); } @@ -62,6 +79,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar if (cmd == F_SETOWN) return syscall_arg__scnprintf_pid(bf, size, arg); + + if (cmd == F_SETLEASE) + return fcntl__scnprintf_getlease(arg->val, bf, size); /* * We still don't grab the contents of pointers on entry or exit, * so just print them as hex numbers From 82d4a1109fc302795a184a328f60ad28bf7b5989 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 15:22:47 -0300 Subject: [PATCH 058/253] perf trace: Group per syscall arg formatter info into one struct Instead of having syscall_fmt.{arg_scnprintf,arg_parm}, introduce struct syscall_arg_fmt and have these two, paving the way for more state to change the formatting algorithms. For instance, in the 'fcntl' 'cmd' case it is better not to suppress it when being zero, showing instead its name "DUPFD". We had that in an ad-hoc way just for strarrays, but with more involved cases like fcntl, that can't be done with just a strarray, we'll need a ".show_zero" arg in the 'cmd' syscall_arg_fmt. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ch06o2j72zbjx5xww4qp67au@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 256 +++++++++++++++++++------------------ 1 file changed, 133 insertions(+), 123 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0dedce0ca0bb..32778a621a08 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -582,9 +582,9 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags -#define STRARRAY(arg, name, array) \ - .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ - .arg_parm = { [arg] = &strarray__##array, } +#define STRARRAY(name, array) \ + { .scnprintf = SCA_STRARRAY, \ + .parm = &strarray__##array, } #include "trace/beauty/eventfd.c" #include "trace/beauty/flock.c" @@ -601,54 +601,61 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/socket_type.c" #include "trace/beauty/waitid_options.c" +struct syscall_arg_fmt { + size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + void *parm; +}; + static struct syscall_fmt { const char *name; const char *alias; - size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg); - void *arg_parm[6]; + struct syscall_arg_fmt arg[6]; bool errmsg; bool errpid; bool timeout; bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, - .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, + .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, - { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), }, + { .name = "bpf", .errmsg = true, + .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, - .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, { .name = "chdir", .errmsg = true, }, { .name = "chmod", .errmsg = true, }, { .name = "chroot", .errmsg = true, }, - { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, + { .name = "clock_gettime", .errmsg = true, + .arg = { [0] = STRARRAY(clk_id, clockid), }, }, { .name = "clone", .errpid = true, }, { .name = "close", .errmsg = true, - .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, { .name = "connect", .errmsg = true, }, { .name = "creat", .errmsg = true, }, { .name = "dup", .errmsg = true, }, { .name = "dup2", .errmsg = true, }, { .name = "dup3", .errmsg = true, }, - { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, + { .name = "epoll_ctl", .errmsg = true, + .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, { .name = "eventfd2", .errmsg = true, - .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, + .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, { .name = "faccessat", .errmsg = true, }, { .name = "fadvise64", .errmsg = true, }, { .name = "fallocate", .errmsg = true, }, { .name = "fchdir", .errmsg = true, }, { .name = "fchmod", .errmsg = true, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "fchown", .errmsg = true, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FCNTL_CMD, /* cmd */ - [2] = SCA_FCNTL_ARG, /* arg */ }, - .arg_parm = { [1] = &strarrays__fcntl_cmds_arrays, /* cmd */ }, }, + .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ + .parm = &strarrays__fcntl_cmds_arrays, /* cmd */ }, + [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, + .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, { .name = "fsetxattr", .errmsg = true, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, @@ -656,186 +663,190 @@ static struct syscall_fmt { { .name = "fsync", .errmsg = true, }, { .name = "ftruncate", .errmsg = true, }, { .name = "futex", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, + .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "getdents", .errmsg = true, }, { .name = "getdents64", .errmsg = true, }, - { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "getitimer", .errmsg = true, + .arg = { [0] = STRARRAY(which, itimers), }, }, { .name = "getpid", .errpid = true, }, { .name = "getpgid", .errpid = true, }, { .name = "getppid", .errpid = true, }, { .name = "getrandom", .errmsg = true, - .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, - { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, + { .name = "getrlimit", .errmsg = true, + .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, { .name = "getxattr", .errmsg = true, }, - { .name = "inotify_add_watch", .errmsg = true, }, + { .name = "inotify_add_watch", .errmsg = true, }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { + .arg = { #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. */ - [1] = SCA_STRHEXARRAY, /* cmd */ - [2] = SCA_HEX, /* arg */ }, - .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, }, + [1] = { .scnprintf = SCA_STRHEXARRAY, /* cmd */ + .parm = &strarray__tioctls, }, + [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, #else - [2] = SCA_HEX, /* arg */ }, }, + [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, #endif - { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, + { .name = "keyctl", .errmsg = true, + .arg = { [0] = STRARRAY(option, keyctl_options), }, }, { .name = "kill", .errmsg = true, - .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "lchown", .errmsg = true, }, { .name = "lgetxattr", .errmsg = true, }, { .name = "linkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "listxattr", .errmsg = true, }, { .name = "llistxattr", .errmsg = true, }, { .name = "lremovexattr", .errmsg = true, }, { .name = "lseek", .errmsg = true, - .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, - .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, + .arg = { [2] = STRARRAY(whence, whences), }, }, { .name = "lsetxattr", .errmsg = true, }, { .name = "lstat", .errmsg = true, .alias = "newlstat", }, { .name = "lsxattr", .errmsg = true, }, { .name = "madvise", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* start */ - [2] = SCA_MADV_BHV, /* behavior */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, + [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, { .name = "mkdir", .errmsg = true, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "mknod", .errmsg = true, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "mlock", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, { .name = "mlockall", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, { .name = "mmap", .hexret = true, /* The standard mmap maps to old_mmap on s390x */ #if defined(__s390x__) .alias = "old_mmap", #endif - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ - [2] = SCA_MMAP_PROT, /* prot */ - [3] = SCA_MMAP_FLAGS, /* flags */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* start */ - [2] = SCA_MMAP_PROT, /* prot */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, { .name = "mq_unlink", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, }, + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, { .name = "mremap", .hexret = true, - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ - [3] = SCA_MREMAP_FLAGS, /* flags */ - [4] = SCA_HEX, /* new_addr */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, + [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, + [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, { .name = "munlock", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, { .name = "munmap", .errmsg = true, - .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, + .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, { .name = "name_to_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "open_by_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, + [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "openat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, + [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "perf_event_open", .errmsg = true, - .arg_scnprintf = { [2] = SCA_INT, /* cpu */ - [3] = SCA_FD, /* group_fd */ - [4] = SCA_PERF_FLAGS, /* flags */ }, }, + .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, + [3] = { .scnprintf = SCA_FD, /* group_fd */ }, + [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", .errmsg = true, - .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, + .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", }, { .name = "preadv", .errmsg = true, .alias = "pread", }, - { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, + { .name = "prlimit64", .errmsg = true, + .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "pwritev", .errmsg = true, }, { .name = "read", .errmsg = true, }, { .name = "readlink", .errmsg = true, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "readv", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "recvmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "recvmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "removexattr", .errmsg = true, }, { .name = "renameat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "rmdir", .errmsg = true, }, { .name = "rt_sigaction", .errmsg = true, - .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, - { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, + .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, + { .name = "rt_sigprocmask", .errmsg = true, + .arg = { [0] = STRARRAY(how, sighow), }, }, { .name = "rt_sigqueueinfo", .errmsg = true, - .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "rt_tgsigqueueinfo", .errmsg = true, - .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "sched_getattr", .errmsg = true, }, { .name = "sched_setattr", .errmsg = true, }, { .name = "sched_setscheduler", .errmsg = true, - .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, }, + .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, { .name = "seccomp", .errmsg = true, - .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */ - [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, + .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, + [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendmsg", .errmsg = true, - .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendto", .errmsg = true, - .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "set_tid_address", .errpid = true, }, - { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "setitimer", .errmsg = true, + .arg = { [0] = STRARRAY(which, itimers), }, }, { .name = "setpgid", .errmsg = true, }, - { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, + { .name = "setrlimit", .errmsg = true, + .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, { .name = "setxattr", .errmsg = true, }, { .name = "shutdown", .errmsg = true, }, { .name = "socket", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ - [1] = SCA_SK_TYPE, /* type */ }, - .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, + .arg = { [0] = STRARRAY(family, socket_families), + [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, { .name = "socketpair", .errmsg = true, - .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ - [1] = SCA_SK_TYPE, /* type */ }, - .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, + .arg = { [0] = STRARRAY(family, socket_families), + [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "statfs", .errmsg = true, }, { .name = "statx", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* flags */ - [2] = SCA_STATX_FLAGS, /* flags */ - [3] = SCA_STATX_MASK, /* mask */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, + [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , + [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, { .name = "swapoff", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, { .name = "swapon", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, { .name = "symlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "tgkill", .errmsg = true, - .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", .errmsg = true, - .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, + .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "truncate", .errmsg = true, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, { .name = "unlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, { .name = "utime", .errmsg = true, }, { .name = "utimensat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, { .name = "utimes", .errmsg = true, }, { .name = "vmsplice", .errmsg = true, }, { .name = "wait4", .errpid = true, - .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, + .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, - .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, }, + .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "write", .errmsg = true, }, { .name = "writev", .errmsg = true, }, }; @@ -859,8 +870,7 @@ struct syscall { const char *name; bool is_exit; struct syscall_fmt *fmt; - size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); - void **arg_parm; + struct syscall_arg_fmt *arg_fmt; }; /* @@ -1209,27 +1219,29 @@ static int syscall__set_arg_fmts(struct syscall *sc) struct format_field *field; int idx = 0, len; - sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); - if (sc->arg_scnprintf == NULL) + sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt)); + if (sc->arg_fmt == NULL) return -1; - if (sc->fmt) - sc->arg_parm = sc->fmt->arg_parm; + for (field = sc->args; field; field = field->next, ++idx) { + if (sc->fmt) { + sc->arg_fmt[idx] = sc->fmt->arg[idx]; - for (field = sc->args; field; field = field->next) { - if (sc->fmt && sc->fmt->arg_scnprintf[idx]) - sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; - else if (strcmp(field->type, "const char *") == 0 && + if (sc->fmt->arg[idx].scnprintf) + continue; + } + + if (strcmp(field->type, "const char *") == 0 && (strcmp(field->name, "filename") == 0 || strcmp(field->name, "path") == 0 || strcmp(field->name, "pathname") == 0)) - sc->arg_scnprintf[idx] = SCA_FILENAME; + sc->arg_fmt[idx].scnprintf = SCA_FILENAME; else if (field->flags & FIELD_IS_POINTER) - sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; + sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; else if (strcmp(field->type, "pid_t") == 0) - sc->arg_scnprintf[idx] = SCA_PID; + sc->arg_fmt[idx].scnprintf = SCA_PID; else if (strcmp(field->type, "umode_t") == 0) - sc->arg_scnprintf[idx] = SCA_MODE_T; + sc->arg_fmt[idx].scnprintf = SCA_MODE_T; else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && @@ -1242,9 +1254,8 @@ static int syscall__set_arg_fmts(struct syscall *sc) * 23 unsigned int * 7 unsigned long */ - sc->arg_scnprintf[idx] = SCA_FD; + sc->arg_fmt[idx].scnprintf = SCA_FD; } - ++idx; } return 0; @@ -1416,20 +1427,19 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, * strarray for it. */ if (val == 0 && - !(sc->arg_scnprintf && - (sc->arg_scnprintf[arg.idx] == SCA_STRARRAY || - sc->arg_scnprintf[arg.idx] == SCA_STRARRAYS) && - sc->arg_parm[arg.idx])) + !(sc->arg_fmt && + (sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || + sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && + sc->arg_fmt[arg.idx].parm)) continue; printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); - if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { + if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) { arg.val = val; - if (sc->arg_parm) - arg.parm = sc->arg_parm[arg.idx]; - printed += sc->arg_scnprintf[arg.idx](bf + printed, - size - printed, &arg); + if (sc->arg_fmt[arg.idx].parm) + arg.parm = sc->arg_fmt[arg.idx].parm; + printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, "%ld", val); From d47737d524174a81b80c487fe07de3ee2458ee32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 15:59:03 -0300 Subject: [PATCH 059/253] perf trace: Allow syscall arg formatters to request non suppression of zeros The 'perf trace' tool is suppressing args set to zero, with the exception of string tables (strarrays), which are kinda like enums, i.e. we have maps to go from numbers to strings. But the 'cmd' fcntl arg requires more specialized treatment, as its value will regulate if the next fcntl syscall arg, 'arg', should be ignored (not used) and also how to format the syscall return (fd, file flags, etc), so add a 'show_zero" bool to struct syscall_arg_fmt, to regulate this more explicitely. Will be used in a following patch with fcntl, here is just the mechanism. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-all738jctxets8ffyizp5lzo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 32778a621a08..b842bd93457f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -604,6 +604,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); void *parm; + bool show_zero; }; static struct syscall_fmt { @@ -1428,7 +1429,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, */ if (val == 0 && !(sc->arg_fmt && - (sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || + (sc->arg_fmt[arg.idx].show_zero || + sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && sc->arg_fmt[arg.idx].parm)) continue; From 39cc355b0486d89cef335e35edde26c14b462126 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 16:02:52 -0300 Subject: [PATCH 060/253] perf trace beauty fcntl: Do not suppress 'cmd' when zero, should be DUPFD Before: 77059.513 ( 0.005 ms): bash/6649 fcntl(fd: 1, arg: 10) = 10 After: 77059.513 ( 0.005 ms): bash/6649 fcntl(fd: 1, cmd: DUPFD, arg: 10) = 10 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-woois88uwcr4xu38xx1ihiwo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b842bd93457f..bab87f612a36 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -652,7 +652,8 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "fcntl", .errmsg = true, .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ - .parm = &strarrays__fcntl_cmds_arrays, /* cmd */ }, + .parm = &strarrays__fcntl_cmds_arrays, + .show_zero = true, }, [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, From befecc810c95994774439baeedc8a03fe8d51e8f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jul 2017 16:04:20 -0300 Subject: [PATCH 061/253] perf trace beauty fcntl: Beautify the 'arg' for DUPFD Before: 77059.513 ( 0.005 ms): bash/6649 fcntl(fd: 1, cmd: DUPFD, arg: 10) = 10 After: 77059.513 ( 0.005 ms): bash/6649 fcntl(fd: 1, cmd: DUPFD, arg: 10) = 10 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0k8iszng0slcuw0rc6xq1x5l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fcntl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index b6987c468d18..9e8900c13cb1 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -71,6 +71,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar { int cmd = syscall_arg__val(arg, 1); + if (cmd == F_DUPFD) + return syscall_arg__scnprintf_fd(bf, size, arg); + if (cmd == F_SETFD) return fcntl__scnprintf_getfd(arg->val, bf, size); From 1f63139c3f8af1d6a09de5dd355c8b5695407c79 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 12:45:57 -0300 Subject: [PATCH 062/253] perf trace beauty: Simplify syscall return formatting Removing syscall_fmt::err_msg and instead always formatting negative returns as errno values. With this we can remove a lot of entries that have no special handling besides the ones we can do by looking at the tracefs format files, i.e. the types for the fields (e.g. pid_t), well known names (e.g. fd). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-rg9u7a3qqdnzo37d212vnz2o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 209 ++++++++++++++----------------------- 1 file changed, 81 insertions(+), 128 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bab87f612a36..1e4c0657b712 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -611,77 +611,53 @@ static struct syscall_fmt { const char *name; const char *alias; struct syscall_arg_fmt arg[6]; - bool errmsg; bool errpid; bool timeout; bool hexret; } syscall_fmts[] = { - { .name = "access", .errmsg = true, + { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, - { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, - { .name = "bpf", .errmsg = true, + { .name = "arch_prctl", .alias = "prctl", }, + { .name = "bpf", .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, - { .name = "chdir", .errmsg = true, }, - { .name = "chmod", .errmsg = true, }, - { .name = "chroot", .errmsg = true, }, - { .name = "clock_gettime", .errmsg = true, + { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, { .name = "clone", .errpid = true, }, - { .name = "close", .errmsg = true, + { .name = "close", .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, - { .name = "connect", .errmsg = true, }, - { .name = "creat", .errmsg = true, }, - { .name = "dup", .errmsg = true, }, - { .name = "dup2", .errmsg = true, }, - { .name = "dup3", .errmsg = true, }, - { .name = "epoll_ctl", .errmsg = true, + { .name = "epoll_ctl", .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, - { .name = "eventfd2", .errmsg = true, + { .name = "eventfd2", .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, - { .name = "faccessat", .errmsg = true, }, - { .name = "fadvise64", .errmsg = true, }, - { .name = "fallocate", .errmsg = true, }, - { .name = "fchdir", .errmsg = true, }, - { .name = "fchmod", .errmsg = true, }, - { .name = "fchmodat", .errmsg = true, + { .name = "fchmodat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "fchown", .errmsg = true, }, - { .name = "fchownat", .errmsg = true, + { .name = "fchownat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "fcntl", .errmsg = true, + { .name = "fcntl", .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ .parm = &strarrays__fcntl_cmds_arrays, .show_zero = true, }, [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, - { .name = "fdatasync", .errmsg = true, }, - { .name = "flock", .errmsg = true, + { .name = "flock", .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, - { .name = "fsetxattr", .errmsg = true, }, - { .name = "fstat", .errmsg = true, .alias = "newfstat", }, - { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, - { .name = "fstatfs", .errmsg = true, }, - { .name = "fsync", .errmsg = true, }, - { .name = "ftruncate", .errmsg = true, }, - { .name = "futex", .errmsg = true, + { .name = "fstat", .alias = "newfstat", }, + { .name = "fstatat", .alias = "newfstatat", }, + { .name = "futex", .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, }, - { .name = "futimesat", .errmsg = true, + { .name = "futimesat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "getdents", .errmsg = true, }, - { .name = "getdents64", .errmsg = true, }, - { .name = "getitimer", .errmsg = true, + { .name = "getitimer", .arg = { [0] = STRARRAY(which, itimers), }, }, { .name = "getpid", .errpid = true, }, { .name = "getpgid", .errpid = true, }, { .name = "getppid", .errpid = true, }, - { .name = "getrandom", .errmsg = true, + { .name = "getrandom", .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, - { .name = "getrlimit", .errmsg = true, + { .name = "getrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, - { .name = "getxattr", .errmsg = true, }, - { .name = "inotify_add_watch", .errmsg = true, }, - { .name = "ioctl", .errmsg = true, + { .name = "ioctl", .arg = { #if defined(__i386__) || defined(__x86_64__) /* @@ -693,34 +669,25 @@ static struct syscall_fmt { #else [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, #endif - { .name = "keyctl", .errmsg = true, + { .name = "keyctl", .arg = { [0] = STRARRAY(option, keyctl_options), }, }, - { .name = "kill", .errmsg = true, + { .name = "kill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "lchown", .errmsg = true, }, - { .name = "lgetxattr", .errmsg = true, }, - { .name = "linkat", .errmsg = true, + { .name = "linkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "listxattr", .errmsg = true, }, - { .name = "llistxattr", .errmsg = true, }, - { .name = "lremovexattr", .errmsg = true, }, - { .name = "lseek", .errmsg = true, + { .name = "lseek", .arg = { [2] = STRARRAY(whence, whences), }, }, - { .name = "lsetxattr", .errmsg = true, }, - { .name = "lstat", .errmsg = true, .alias = "newlstat", }, - { .name = "lsxattr", .errmsg = true, }, - { .name = "madvise", .errmsg = true, + { .name = "lstat", .alias = "newlstat", }, + { .name = "madvise", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, - { .name = "mkdir", .errmsg = true, }, - { .name = "mkdirat", .errmsg = true, + { .name = "mkdirat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "mknod", .errmsg = true, }, - { .name = "mknodat", .errmsg = true, + { .name = "mknodat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, - { .name = "mlock", .errmsg = true, + { .name = "mlock", .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, - { .name = "mlockall", .errmsg = true, + { .name = "mlockall", .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, { .name = "mmap", .hexret = true, /* The standard mmap maps to old_mmap on s390x */ @@ -730,127 +697,109 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, - { .name = "mprotect", .errmsg = true, + { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, - { .name = "mq_unlink", .errmsg = true, + { .name = "mq_unlink", .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, { .name = "mremap", .hexret = true, .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, }, - { .name = "munlock", .errmsg = true, + { .name = "munlock", .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, - { .name = "munmap", .errmsg = true, + { .name = "munmap", .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, }, - { .name = "name_to_handle_at", .errmsg = true, + { .name = "name_to_handle_at", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "newfstatat", .errmsg = true, + { .name = "newfstatat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "open", .errmsg = true, + { .name = "open", .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, - { .name = "open_by_handle_at", .errmsg = true, + { .name = "open_by_handle_at", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, - { .name = "openat", .errmsg = true, + { .name = "openat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, - { .name = "perf_event_open", .errmsg = true, + { .name = "perf_event_open", .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, [3] = { .scnprintf = SCA_FD, /* group_fd */ }, [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, - { .name = "pipe2", .errmsg = true, + { .name = "pipe2", .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, - { .name = "poll", .errmsg = true, .timeout = true, }, - { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "pread", .errmsg = true, .alias = "pread64", }, - { .name = "preadv", .errmsg = true, .alias = "pread", }, - { .name = "prlimit64", .errmsg = true, + { .name = "poll", .timeout = true, }, + { .name = "ppoll", .timeout = true, }, + { .name = "pread", .alias = "pread64", }, + { .name = "preadv", .alias = "pread", }, + { .name = "prlimit64", .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, - { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, - { .name = "pwritev", .errmsg = true, }, - { .name = "read", .errmsg = true, }, - { .name = "readlink", .errmsg = true, }, - { .name = "readlinkat", .errmsg = true, + { .name = "pwrite", .alias = "pwrite64", }, + { .name = "readlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "readv", .errmsg = true, }, - { .name = "recvfrom", .errmsg = true, + { .name = "recvfrom", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, - { .name = "recvmmsg", .errmsg = true, + { .name = "recvmmsg", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, - { .name = "recvmsg", .errmsg = true, + { .name = "recvmsg", .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, - { .name = "removexattr", .errmsg = true, }, - { .name = "renameat", .errmsg = true, + { .name = "renameat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "rmdir", .errmsg = true, }, - { .name = "rt_sigaction", .errmsg = true, + { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "rt_sigprocmask", .errmsg = true, + { .name = "rt_sigprocmask", .arg = { [0] = STRARRAY(how, sighow), }, }, - { .name = "rt_sigqueueinfo", .errmsg = true, + { .name = "rt_sigqueueinfo", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "rt_tgsigqueueinfo", .errmsg = true, + { .name = "rt_tgsigqueueinfo", .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "sched_getattr", .errmsg = true, }, - { .name = "sched_setattr", .errmsg = true, }, - { .name = "sched_setscheduler", .errmsg = true, + { .name = "sched_setscheduler", .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, - { .name = "seccomp", .errmsg = true, + { .name = "seccomp", .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, - { .name = "select", .errmsg = true, .timeout = true, }, - { .name = "sendmmsg", .errmsg = true, + { .name = "select", .timeout = true, }, + { .name = "sendmmsg", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, - { .name = "sendmsg", .errmsg = true, + { .name = "sendmsg", .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, - { .name = "sendto", .errmsg = true, + { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "set_tid_address", .errpid = true, }, - { .name = "setitimer", .errmsg = true, + { .name = "setitimer", .arg = { [0] = STRARRAY(which, itimers), }, }, - { .name = "setpgid", .errmsg = true, }, - { .name = "setrlimit", .errmsg = true, + { .name = "setrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, - { .name = "setxattr", .errmsg = true, }, - { .name = "shutdown", .errmsg = true, }, - { .name = "socket", .errmsg = true, + { .name = "socket", .arg = { [0] = STRARRAY(family, socket_families), [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, - { .name = "socketpair", .errmsg = true, + { .name = "socketpair", .arg = { [0] = STRARRAY(family, socket_families), [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, - { .name = "stat", .errmsg = true, .alias = "newstat", }, - { .name = "statfs", .errmsg = true, }, - { .name = "statx", .errmsg = true, + { .name = "stat", .alias = "newstat", }, + { .name = "statx", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, - { .name = "swapoff", .errmsg = true, + { .name = "swapoff", .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, - { .name = "swapon", .errmsg = true, + { .name = "swapon", .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, - { .name = "symlinkat", .errmsg = true, + { .name = "symlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "tgkill", .errmsg = true, + { .name = "tgkill", .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "tkill", .errmsg = true, + { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, - { .name = "truncate", .errmsg = true, }, - { .name = "uname", .errmsg = true, .alias = "newuname", }, - { .name = "unlinkat", .errmsg = true, + { .name = "uname", .alias = "newuname", }, + { .name = "unlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, - { .name = "utime", .errmsg = true, }, - { .name = "utimensat", .errmsg = true, + { .name = "utimensat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, - { .name = "utimes", .errmsg = true, }, - { .name = "vmsplice", .errmsg = true, }, { .name = "wait4", .errpid = true, .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, - { .name = "write", .errmsg = true, }, - { .name = "writev", .errmsg = true, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1708,14 +1657,18 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } if (sc->fmt == NULL) { + if (ret < 0) + goto errno_print; signed_print: - fprintf(trace->output, ") = %ld", ret); - } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) { + fprintf(trace->output, ") %ld", ret); + } else if (ret < 0) { +errno_print: { char bf[STRERR_BUFSIZE]; const char *emsg = str_error_r(-ret, bf, sizeof(bf)), *e = audit_errno_to_name(-ret); fprintf(trace->output, ") = -1 %s %s", e, emsg); + } } else if (ret == 0 && sc->fmt->timeout) fprintf(trace->output, ") = 0 Timeout"); else if (ttrace->ret_scnprintf) { From 8b8ef2d74dec305b99dd43ec71d3ba2f502100b9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 19 Jul 2017 04:31:32 +0800 Subject: [PATCH 063/253] perf report: Enable finding kernel inline functions Currently perf supports a mode to query inline stack. It works well for finding user space inline functions but it doesn't work for kernel ones, due to some unnecessary check. This patch removes these unnecessary checks. Now kernel inline functions can be reported. For example: perf report --inline -g func --stdio |--46.19%--do_huge_pmd_anonymous_page | do_huge_pmd_anonymous_page (inline) | __do_huge_pmd_anonymous_page (inline) | __SetPageUptodate (inline) | __set_bit (inline) The result is compared with the output of addr2line. They match. Signed-off-by: Yao Jin Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Milian Wolff Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500409892-15904-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 3 --- tools/perf/ui/stdio/hist.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 69f4570bd4f9..f4bc2462bc2c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -166,9 +166,6 @@ static struct inline_node *inline_node__create(struct map *map, u64 ip) if (dso == NULL) return NULL; - if (dso->kernel != DSO_TYPE_USER) - return NULL; - node = dso__parse_addr_inlines(dso, map__rip_2objdump(map, ip)); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 42e432bd2eb4..2df8eb1ed3c0 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -35,9 +35,6 @@ static size_t inline__fprintf(struct map *map, u64 ip, int left_margin, if (dso == NULL) return 0; - if (dso->kernel != DSO_TYPE_USER) - return 0; - node = dso__parse_addr_inlines(dso, map__rip_2objdump(map, ip)); if (node == NULL) From 6200e49423f8023eba54cf0b01076d6f8c0af6ae Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:34 -0700 Subject: [PATCH 064/253] perf header: Encapsulate read and swap Most callers of readn() in perf header read either a 32 or a 64 bits number, error check it and swap it, if necessary. Create do_read_u32 and do_read_u64 to simplify these use cases. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-2-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 210 ++++++++++++++------------------------- 1 file changed, 77 insertions(+), 133 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 76ed7d03e500..24e842c02027 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -119,25 +119,54 @@ static int do_write_string(int fd, const char *str) return write_padded(fd, str, olen, len); } +static int __do_read(int fd, void *addr, ssize_t size) +{ + ssize_t ret = readn(fd, addr, size); + + if (ret != size) + return ret < 0 ? (int)ret : -1; + return 0; +} + +static int do_read_u32(int fd, struct perf_header *ph, u32 *addr) +{ + int ret; + + ret = __do_read(fd, addr, sizeof(*addr)); + if (ret) + return ret; + + if (ph->needs_swap) + *addr = bswap_32(*addr); + return 0; +} + +static int do_read_u64(int fd, struct perf_header *ph, u64 *addr) +{ + int ret; + + ret = __do_read(fd, addr, sizeof(*addr)); + if (ret) + return ret; + + if (ph->needs_swap) + *addr = bswap_64(*addr); + return 0; +} + static char *do_read_string(int fd, struct perf_header *ph) { - ssize_t sz, ret; u32 len; char *buf; - sz = readn(fd, &len, sizeof(len)); - if (sz < (ssize_t)sizeof(len)) + if (do_read_u32(fd, ph, &len)) return NULL; - if (ph->needs_swap) - len = bswap_32(len); - buf = malloc(len); if (!buf) return NULL; - ret = readn(fd, buf, len); - if (ret == (ssize_t)len) { + if (!__do_read(fd, buf, len)) { /* * strings are padded by zeroes * thus the actual strlen of buf @@ -1188,24 +1217,15 @@ read_event_desc(struct perf_header *ph, int fd) u64 *id; void *buf = NULL; u32 nre, sz, nr, i, j; - ssize_t ret; size_t msz; /* number of events */ - ret = readn(fd, &nre, sizeof(nre)); - if (ret != (ssize_t)sizeof(nre)) + if (do_read_u32(fd, ph, &nre)) goto error; - if (ph->needs_swap) - nre = bswap_32(nre); - - ret = readn(fd, &sz, sizeof(sz)); - if (ret != (ssize_t)sizeof(sz)) + if (do_read_u32(fd, ph, &sz)) goto error; - if (ph->needs_swap) - sz = bswap_32(sz); - /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) @@ -1227,8 +1247,7 @@ read_event_desc(struct perf_header *ph, int fd) * must read entire on-file attr struct to * sync up with layout. */ - ret = readn(fd, buf, sz); - if (ret != (ssize_t)sz) + if (__do_read(fd, buf, sz)) goto error; if (ph->needs_swap) @@ -1236,16 +1255,15 @@ read_event_desc(struct perf_header *ph, int fd) memcpy(&evsel->attr, buf, msz); - ret = readn(fd, &nr, sizeof(nr)); - if (ret != (ssize_t)sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) goto error; - if (ph->needs_swap) { - nr = bswap_32(nr); + if (ph->needs_swap) evsel->needs_swap = true; - } evsel->name = do_read_string(fd, ph); + if (!evsel->name) + goto error; if (!nr) continue; @@ -1257,11 +1275,8 @@ read_event_desc(struct perf_header *ph, int fd) evsel->id = id; for (j = 0 ; j < nr; j++) { - ret = readn(fd, id, sizeof(*id)); - if (ret != (ssize_t)sizeof(*id)) + if (do_read_u64(fd, ph, id)) goto error; - if (ph->needs_swap) - *id = bswap_64(*id); id++; } } @@ -1641,26 +1656,18 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) { - ssize_t ret; - u32 nr; + int ret; + u32 nr_cpus_avail, nr_cpus_online; - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) - return -1; + ret = do_read_u32(fd, ph, &nr_cpus_avail); + if (ret) + return ret; - if (ph->needs_swap) - nr = bswap_32(nr); - - ph->env.nr_cpus_avail = nr; - - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) - return -1; - - if (ph->needs_swap) - nr = bswap_32(nr); - - ph->env.nr_cpus_online = nr; + ret = do_read_u32(fd, ph, &nr_cpus_online); + if (ret) + return ret; + ph->env.nr_cpus_avail = (int)nr_cpus_avail; + ph->env.nr_cpus_online = (int)nr_cpus_online; return 0; } @@ -1684,17 +1691,13 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) { - uint64_t mem; - ssize_t ret; + u64 total_mem; + int ret; - ret = readn(fd, &mem, sizeof(mem)); - if (ret != sizeof(mem)) + ret = do_read_u64(fd, ph, &total_mem); + if (ret) return -1; - - if (ph->needs_swap) - mem = bswap_64(mem); - - ph->env.total_mem = mem; + ph->env.total_mem = (unsigned long long)total_mem; return 0; } @@ -1754,17 +1757,12 @@ static int process_cmdline(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { - ssize_t ret; char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) return -1; - if (ph->needs_swap) - nr = bswap_32(nr); - ph->env.nr_cmdline = nr; cmdline = zalloc(section->size + nr + 1); @@ -1799,7 +1797,6 @@ static int process_cpu_topology(struct perf_file_section *section, struct perf_header *ph, int fd, void *data __maybe_unused) { - ssize_t ret; u32 nr, i; char *str; struct strbuf sb; @@ -1810,13 +1807,9 @@ static int process_cpu_topology(struct perf_file_section *section, if (!ph->env.cpu) return -1; - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) goto free_cpu; - if (ph->needs_swap) - nr = bswap_32(nr); - ph->env.nr_sibling_cores = nr; size += sizeof(u32); if (strbuf_init(&sb, 128) < 0) @@ -1835,13 +1828,9 @@ static int process_cpu_topology(struct perf_file_section *section, } ph->env.sibling_cores = strbuf_detach(&sb, NULL); - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) return -1; - if (ph->needs_swap) - nr = bswap_32(nr); - ph->env.nr_sibling_threads = nr; size += sizeof(u32); @@ -1868,22 +1857,14 @@ static int process_cpu_topology(struct perf_file_section *section, } for (i = 0; i < (u32)cpu_nr; i++) { - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) goto free_cpu; - if (ph->needs_swap) - nr = bswap_32(nr); - ph->env.cpu[i].core_id = nr; - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) goto free_cpu; - if (ph->needs_swap) - nr = bswap_32(nr); - if (nr != (u32)-1 && nr > (u32)cpu_nr) { pr_debug("socket_id number is too big." "You may need to upgrade the perf tool.\n"); @@ -1907,18 +1888,13 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse void *data __maybe_unused) { struct numa_node *nodes, *n; - ssize_t ret; u32 nr, i; char *str; /* nr nodes */ - ret = readn(fd, &nr, sizeof(nr)); - if (ret != sizeof(nr)) + if (do_read_u32(fd, ph, &nr)) return -1; - if (ph->needs_swap) - nr = bswap_32(nr); - nodes = zalloc(sizeof(*nodes) * nr); if (!nodes) return -ENOMEM; @@ -1927,24 +1903,15 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse n = &nodes[i]; /* node number */ - ret = readn(fd, &n->node, sizeof(u32)); - if (ret != sizeof(n->node)) + if (do_read_u32(fd, ph, &n->node)) goto error; - ret = readn(fd, &n->mem_total, sizeof(u64)); - if (ret != sizeof(u64)) + if (do_read_u64(fd, ph, &n->mem_total)) goto error; - ret = readn(fd, &n->mem_free, sizeof(u64)); - if (ret != sizeof(u64)) + if (do_read_u64(fd, ph, &n->mem_free)) goto error; - if (ph->needs_swap) { - n->node = bswap_32(n->node); - n->mem_total = bswap_64(n->mem_total); - n->mem_free = bswap_64(n->mem_free); - } - str = do_read_string(fd, ph); if (!str) goto error; @@ -1968,19 +1935,14 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused struct perf_header *ph, int fd, void *data __maybe_unused) { - ssize_t ret; char *name; u32 pmu_num; u32 type; struct strbuf sb; - ret = readn(fd, &pmu_num, sizeof(pmu_num)); - if (ret != sizeof(pmu_num)) + if (do_read_u32(fd, ph, &pmu_num)) return -1; - if (ph->needs_swap) - pmu_num = bswap_32(pmu_num); - if (!pmu_num) { pr_debug("pmu mappings not available\n"); return 0; @@ -1991,10 +1953,8 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused return -1; while (pmu_num) { - if (readn(fd, &type, sizeof(type)) != sizeof(type)) + if (do_read_u32(fd, ph, &type)) goto error; - if (ph->needs_swap) - type = bswap_32(type); name = do_read_string(fd, ph); if (!name) @@ -2034,12 +1994,9 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, u32 nr_members; } *desc; - if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups)) + if (do_read_u32(fd, ph, &nr_groups)) return -1; - if (ph->needs_swap) - nr_groups = bswap_32(nr_groups); - ph->env.nr_groups = nr_groups; if (!nr_groups) { pr_debug("group desc not available\n"); @@ -2055,16 +2012,11 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, if (!desc[i].name) goto out_free; - if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32)) + if (do_read_u32(fd, ph, &desc[i].leader_idx)) goto out_free; - if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32)) + if (do_read_u32(fd, ph, &desc[i].nr_members)) goto out_free; - - if (ph->needs_swap) { - desc[i].leader_idx = bswap_32(desc[i].leader_idx); - desc[i].nr_members = bswap_32(desc[i].nr_members); - } } /* @@ -2137,21 +2089,15 @@ static int process_cache(struct perf_file_section *section __maybe_unused, struct cpu_cache_level *caches; u32 cnt, i, version; - if (readn(fd, &version, sizeof(version)) != sizeof(version)) + if (do_read_u32(fd, ph, &version)) return -1; - if (ph->needs_swap) - version = bswap_32(version); - if (version != 1) return -1; - if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + if (do_read_u32(fd, ph, &cnt)) return -1; - if (ph->needs_swap) - cnt = bswap_32(cnt); - caches = zalloc(sizeof(*caches) * cnt); if (!caches) return -1; @@ -2160,10 +2106,8 @@ static int process_cache(struct perf_file_section *section __maybe_unused, struct cpu_cache_level c; #define _R(v) \ - if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + if (do_read_u32(fd, ph, &c.v))\ goto out_free_caches; \ - if (ph->needs_swap) \ - c.v = bswap_32(c.v); \ _R(level) _R(line_size) From dfaa1580efcfb6b9537043ba0447747d7179fb26 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:35 -0700 Subject: [PATCH 065/253] perf header: Add PROCESS_STR_FUN macro Simplify code by adding a macro to handle the common case of processing header features that are a simple string. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 65 +++++++++++----------------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 24e842c02027..3b833f06c4e7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1603,6 +1603,23 @@ static int perf_header__read_build_ids(struct perf_header *header, return err; } +/* Macro for features that simply need to read and store a string. */ +#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ +static int process_##__feat(struct perf_file_section *section __maybe_unused, \ + struct perf_header *ph, int fd, \ + void *data __maybe_unused) \ +{\ + ph->env.__feat_env = do_read_string(fd, ph); \ + return ph->env.__feat_env ? 0 : -ENOMEM; \ +} + +FEAT_PROCESS_STR_FUN(hostname, hostname); +FEAT_PROCESS_STR_FUN(osrelease, os_release); +FEAT_PROCESS_STR_FUN(version, version); +FEAT_PROCESS_STR_FUN(arch, arch); +FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc); +FEAT_PROCESS_STR_FUN(cpuid, cpuid); + static int process_tracing_data(struct perf_file_section *section __maybe_unused, struct perf_header *ph __maybe_unused, int fd, void *data) @@ -1620,38 +1637,6 @@ static int process_build_id(struct perf_file_section *section, return 0; } -static int process_hostname(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.hostname = do_read_string(fd, ph); - return ph->env.hostname ? 0 : -ENOMEM; -} - -static int process_osrelease(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.os_release = do_read_string(fd, ph); - return ph->env.os_release ? 0 : -ENOMEM; -} - -static int process_version(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.version = do_read_string(fd, ph); - return ph->env.version ? 0 : -ENOMEM; -} - -static int process_arch(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.arch = do_read_string(fd, ph); - return ph->env.arch ? 0 : -ENOMEM; -} - static int process_nrcpus(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) @@ -1671,22 +1656,6 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cpudesc(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.cpu_desc = do_read_string(fd, ph); - return ph->env.cpu_desc ? 0 : -ENOMEM; -} - -static int process_cpuid(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) -{ - ph->env.cpuid = do_read_string(fd, ph); - return ph->env.cpuid ? 0 : -ENOMEM; -} - static int process_total_mem(struct perf_file_section *section __maybe_unused, struct perf_header *ph, int fd, void *data __maybe_unused) From 2ff5365d75e164032f64133914046fd6be213d94 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:36 -0700 Subject: [PATCH 066/253] perf header: Fail on write_padded error Do not proceed if write_padded() error failed. Also, add comments to remind that the return value of write_* functions in util/header.c is an errno code and not the number of bytes written. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-4-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3b833f06c4e7..8dda19b68ac4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -74,6 +74,7 @@ bool perf_header__has_feat(const struct perf_header *header, int feat) return test_bit(feat, header->adds_features); } +/* Return: 0 if succeded, -ERR if failed. */ static int do_write(int fd, const void *buf, size_t size) { while (size) { @@ -89,6 +90,7 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } +/* Return: 0 if succeded, -ERR if failed. */ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) { static const char zero_buf[NAME_ALIGN]; @@ -103,6 +105,7 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) #define string_size(str) \ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) +/* Return: 0 if succeded, -ERR if failed. */ static int do_write_string(int fd, const char *str) { u32 len, olen; @@ -3200,7 +3203,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, */ tracing_data_put(tdata); - write_padded(fd, NULL, 0, padding); + if (write_padded(fd, NULL, 0, padding)) + return -1; return aligned_size; } From 7c72440506e2108494bab3b68a4118fa61a9dbf4 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:37 -0700 Subject: [PATCH 067/253] perf util: Add const modifier to buf in "writen" function Make buf in helper function "writen" constant to simplify the life of its callers. This requires to hack a cast of buf prior to passing it to "ion" which is simpler than the alternative of reworking the "ion" function to provide a read and a write paths, the latter with constant buf argument. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 6 ++++-- tools/perf/util/util.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9e4ea852f636..4c360daa4e24 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -281,6 +281,7 @@ static ssize_t ion(bool is_read, int fd, void *buf, size_t n) size_t left = n; while (left) { + /* buf must be treated as const if !is_read. */ ssize_t ret = is_read ? read(fd, buf, left) : write(fd, buf, left); @@ -308,9 +309,10 @@ ssize_t readn(int fd, void *buf, size_t n) /* * Write exactly 'n' bytes or return an error. */ -ssize_t writen(int fd, void *buf, size_t n) +ssize_t writen(int fd, const void *buf, size_t n) { - return ion(false, fd, buf, n); + /* ion does not modify buf. */ + return ion(false, fd, (void *)buf, n); } size_t hex_width(u64 v) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 062dd20437f7..b136c271125f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -38,7 +38,7 @@ int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); ssize_t readn(int fd, void *buf, size_t n); -ssize_t writen(int fd, void *buf, size_t n); +ssize_t writen(int fd, const void *buf, size_t n); size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); From 3b8f51a677ce574f69671e3f7822b4d8f8634ef3 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:38 -0700 Subject: [PATCH 068/253] perf header: Revamp do_write() Now that writen takes a const buffer, use it in do_write instead of duplicating its functionality. Export do_write to use it consistently in header.c and build_id.c . Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-6-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 +- tools/perf/util/header.c | 14 +++++--------- tools/perf/util/header.h | 2 ++ 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e9665150e9b1..4baa5329439f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -304,7 +304,7 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id, b.header.misc = misc; b.header.size = sizeof(b) + len; - err = writen(fd, &b, sizeof(b)); + err = do_write(fd, &b, sizeof(b)); if (err < 0) return err; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 8dda19b68ac4..954f0ef8e712 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -75,17 +75,13 @@ bool perf_header__has_feat(const struct perf_header *header, int feat) } /* Return: 0 if succeded, -ERR if failed. */ -static int do_write(int fd, const void *buf, size_t size) +int do_write(int fd, const void *buf, size_t size) { - while (size) { - int ret = write(fd, buf, size); + ssize_t ret; - if (ret < 0) - return -errno; - - size -= ret; - buf += ret; - } + ret = writen(fd, buf, size); + if (ret != (ssize_t)size) + return ret < 0 ? (int)ret : -1; return 0; } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d30109b421ee..e98489c8bba7 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -144,6 +144,8 @@ bool is_perf_magic(u64 magic); #define NAME_ALIGN 64 +int do_write(int fd, const void *buf, size_t size); + int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); /* From ccebbeb6b69e4e172450d32f1059fefd1659ad8c Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:39 -0700 Subject: [PATCH 069/253] perf header: Add struct feat_fd for write Introduce struct feat_fd. This patch uses it as a wrapper around fd in write_* functions for feature headers. Next patches will extend its functionality to other feature header functions. This patch does not change behavior. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-7-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 8 +- tools/perf/util/build-id.h | 4 +- tools/perf/util/header.c | 230 ++++++++++++++++++++----------------- tools/perf/util/header.h | 7 +- 4 files changed, 138 insertions(+), 111 deletions(-) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 4baa5329439f..c1a06fcd7e70 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -289,7 +289,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, else static int write_buildid(const char *name, size_t name_len, u8 *build_id, - pid_t pid, u16 misc, int fd) + pid_t pid, u16 misc, struct feat_fd *fd) { int err; struct build_id_event b; @@ -311,7 +311,8 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id, return write_padded(fd, name, name_len + 1, len); } -static int machine__write_buildid_table(struct machine *machine, int fd) +static int machine__write_buildid_table(struct machine *machine, + struct feat_fd *fd) { int err = 0; char nm[PATH_MAX]; @@ -356,7 +357,8 @@ static int machine__write_buildid_table(struct machine *machine, int fd) return err; } -int perf_session__write_buildid_table(struct perf_session *session, int fd) +int perf_session__write_buildid_table(struct perf_session *session, + struct feat_fd *fd) { struct rb_node *nd; int err = machine__write_buildid_table(&session->machines.host, fd); diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 113dc0615c57..c94b0dcbfd74 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -10,6 +10,7 @@ extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; +struct feat_fd; int build_id__sprintf(const u8 *build_id, int len, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); @@ -27,7 +28,8 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, int dsos__hit_all(struct perf_session *session); bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); -int perf_session__write_buildid_table(struct perf_session *session, int fd); +int perf_session__write_buildid_table(struct perf_session *session, + struct feat_fd *fd); int perf_session__cache_build_ids(struct perf_session *session); char *build_id_cache__origname(const char *sbuild_id); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 954f0ef8e712..a5db9aded45b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -59,6 +59,11 @@ struct perf_file_attr { struct perf_file_section ids; }; +struct feat_fd { + struct perf_header *ph; + int fd; +}; + void perf_header__set_feat(struct perf_header *header, int feat) { set_bit(feat, header->adds_features); @@ -75,11 +80,11 @@ bool perf_header__has_feat(const struct perf_header *header, int feat) } /* Return: 0 if succeded, -ERR if failed. */ -int do_write(int fd, const void *buf, size_t size) +int do_write(struct feat_fd *ff, const void *buf, size_t size) { ssize_t ret; - ret = writen(fd, buf, size); + ret = writen(ff->fd, buf, size); if (ret != (ssize_t)size) return ret < 0 ? (int)ret : -1; @@ -87,13 +92,14 @@ int do_write(int fd, const void *buf, size_t size) } /* Return: 0 if succeded, -ERR if failed. */ -int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) +int write_padded(struct feat_fd *ff, const void *bf, + size_t count, size_t count_aligned) { static const char zero_buf[NAME_ALIGN]; - int err = do_write(fd, bf, count); + int err = do_write(ff, bf, count); if (!err) - err = do_write(fd, zero_buf, count_aligned - count); + err = do_write(ff, zero_buf, count_aligned - count); return err; } @@ -102,7 +108,7 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) /* Return: 0 if succeded, -ERR if failed. */ -static int do_write_string(int fd, const char *str) +static int do_write_string(struct feat_fd *ff, const char *str) { u32 len, olen; int ret; @@ -111,11 +117,11 @@ static int do_write_string(int fd, const char *str) len = PERF_ALIGN(olen, NAME_ALIGN); /* write len, incl. \0 */ - ret = do_write(fd, &len, sizeof(len)); + ret = do_write(ff, &len, sizeof(len)); if (ret < 0) return ret; - return write_padded(fd, str, olen, len); + return write_padded(ff, str, olen, len); } static int __do_read(int fd, void *addr, ssize_t size) @@ -178,25 +184,24 @@ static char *do_read_string(int fd, struct perf_header *ph) return NULL; } -static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, - struct perf_evlist *evlist) +static int write_tracing_data(struct feat_fd *ff, + struct perf_evlist *evlist) { - return read_tracing_data(fd, &evlist->entries); + return read_tracing_data(ff->fd, &evlist->entries); } - -static int write_build_id(int fd, struct perf_header *h, +static int write_build_id(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct perf_session *session; int err; - session = container_of(h, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); if (!perf_session__read_build_ids(session, true)) return -1; - err = perf_session__write_buildid_table(session, fd); + err = perf_session__write_buildid_table(session, ff); if (err < 0) { pr_debug("failed to write buildid table\n"); return err; @@ -206,7 +211,7 @@ static int write_build_id(int fd, struct perf_header *h, return 0; } -static int write_hostname(int fd, struct perf_header *h __maybe_unused, +static int write_hostname(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct utsname uts; @@ -216,10 +221,10 @@ static int write_hostname(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return -1; - return do_write_string(fd, uts.nodename); + return do_write_string(ff, uts.nodename); } -static int write_osrelease(int fd, struct perf_header *h __maybe_unused, +static int write_osrelease(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct utsname uts; @@ -229,10 +234,10 @@ static int write_osrelease(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return -1; - return do_write_string(fd, uts.release); + return do_write_string(ff, uts.release); } -static int write_arch(int fd, struct perf_header *h __maybe_unused, +static int write_arch(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct utsname uts; @@ -242,16 +247,16 @@ static int write_arch(int fd, struct perf_header *h __maybe_unused, if (ret < 0) return -1; - return do_write_string(fd, uts.machine); + return do_write_string(ff, uts.machine); } -static int write_version(int fd, struct perf_header *h __maybe_unused, +static int write_version(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { - return do_write_string(fd, perf_version_string); + return do_write_string(ff, perf_version_string); } -static int __write_cpudesc(int fd, const char *cpuinfo_proc) +static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc) { FILE *file; char *buf = NULL; @@ -301,14 +306,14 @@ static int __write_cpudesc(int fd, const char *cpuinfo_proc) } p++; } - ret = do_write_string(fd, s); + ret = do_write_string(ff, s); done: free(buf); fclose(file); return ret; } -static int write_cpudesc(int fd, struct perf_header *h __maybe_unused, +static int write_cpudesc(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { #ifndef CPUINFO_PROC @@ -319,7 +324,7 @@ static int write_cpudesc(int fd, struct perf_header *h __maybe_unused, for (i = 0; i < ARRAY_SIZE(cpuinfo_procs); i++) { int ret; - ret = __write_cpudesc(fd, cpuinfo_procs[i]); + ret = __write_cpudesc(ff, cpuinfo_procs[i]); if (ret >= 0) return ret; } @@ -327,7 +332,7 @@ static int write_cpudesc(int fd, struct perf_header *h __maybe_unused, } -static int write_nrcpus(int fd, struct perf_header *h __maybe_unused, +static int write_nrcpus(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { long nr; @@ -342,14 +347,14 @@ static int write_nrcpus(int fd, struct perf_header *h __maybe_unused, nra = (u32)(nr & UINT_MAX); - ret = do_write(fd, &nrc, sizeof(nrc)); + ret = do_write(ff, &nrc, sizeof(nrc)); if (ret < 0) return ret; - return do_write(fd, &nra, sizeof(nra)); + return do_write(ff, &nra, sizeof(nra)); } -static int write_event_desc(int fd, struct perf_header *h __maybe_unused, +static int write_event_desc(struct feat_fd *ff, struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -361,7 +366,7 @@ static int write_event_desc(int fd, struct perf_header *h __maybe_unused, /* * write number of events */ - ret = do_write(fd, &nre, sizeof(nre)); + ret = do_write(ff, &nre, sizeof(nre)); if (ret < 0) return ret; @@ -369,12 +374,12 @@ static int write_event_desc(int fd, struct perf_header *h __maybe_unused, * size of perf_event_attr struct */ sz = (u32)sizeof(evsel->attr); - ret = do_write(fd, &sz, sizeof(sz)); + ret = do_write(ff, &sz, sizeof(sz)); if (ret < 0) return ret; evlist__for_each_entry(evlist, evsel) { - ret = do_write(fd, &evsel->attr, sz); + ret = do_write(ff, &evsel->attr, sz); if (ret < 0) return ret; /* @@ -385,27 +390,27 @@ static int write_event_desc(int fd, struct perf_header *h __maybe_unused, * type of ids, */ nri = evsel->ids; - ret = do_write(fd, &nri, sizeof(nri)); + ret = do_write(ff, &nri, sizeof(nri)); if (ret < 0) return ret; /* * write event string as passed on cmdline */ - ret = do_write_string(fd, perf_evsel__name(evsel)); + ret = do_write_string(ff, perf_evsel__name(evsel)); if (ret < 0) return ret; /* * write unique ids for this event */ - ret = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); + ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64)); if (ret < 0) return ret; } return 0; } -static int write_cmdline(int fd, struct perf_header *h __maybe_unused, +static int write_cmdline(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { char buf[MAXPATHLEN]; @@ -423,16 +428,16 @@ static int write_cmdline(int fd, struct perf_header *h __maybe_unused, /* account for binary path */ n = perf_env.nr_cmdline + 1; - ret = do_write(fd, &n, sizeof(n)); + ret = do_write(ff, &n, sizeof(n)); if (ret < 0) return ret; - ret = do_write_string(fd, buf); + ret = do_write_string(ff, buf); if (ret < 0) return ret; for (i = 0 ; i < perf_env.nr_cmdline; i++) { - ret = do_write_string(fd, perf_env.cmdline_argv[i]); + ret = do_write_string(ff, perf_env.cmdline_argv[i]); if (ret < 0) return ret; } @@ -585,8 +590,8 @@ static struct cpu_topo *build_cpu_topology(void) return tp; } -static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static int write_cpu_topology(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) { struct cpu_topo *tp; u32 i; @@ -596,21 +601,21 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, if (!tp) return -1; - ret = do_write(fd, &tp->core_sib, sizeof(tp->core_sib)); + ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib)); if (ret < 0) goto done; for (i = 0; i < tp->core_sib; i++) { - ret = do_write_string(fd, tp->core_siblings[i]); + ret = do_write_string(ff, tp->core_siblings[i]); if (ret < 0) goto done; } - ret = do_write(fd, &tp->thread_sib, sizeof(tp->thread_sib)); + ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib)); if (ret < 0) goto done; for (i = 0; i < tp->thread_sib; i++) { - ret = do_write_string(fd, tp->thread_siblings[i]); + ret = do_write_string(ff, tp->thread_siblings[i]); if (ret < 0) break; } @@ -620,11 +625,11 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, goto done; for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(fd, &perf_env.cpu[j].core_id, + ret = do_write(ff, &perf_env.cpu[j].core_id, sizeof(perf_env.cpu[j].core_id)); if (ret < 0) return ret; - ret = do_write(fd, &perf_env.cpu[j].socket_id, + ret = do_write(ff, &perf_env.cpu[j].socket_id, sizeof(perf_env.cpu[j].socket_id)); if (ret < 0) return ret; @@ -636,8 +641,8 @@ static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused, -static int write_total_mem(int fd, struct perf_header *h __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static int write_total_mem(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; FILE *fp; @@ -657,7 +662,7 @@ static int write_total_mem(int fd, struct perf_header *h __maybe_unused, if (!ret) { n = sscanf(buf, "%*s %"PRIu64, &mem); if (n == 1) - ret = do_write(fd, &mem, sizeof(mem)); + ret = do_write(ff, &mem, sizeof(mem)); } else ret = -1; free(buf); @@ -665,7 +670,7 @@ static int write_total_mem(int fd, struct perf_header *h __maybe_unused, return ret; } -static int write_topo_node(int fd, int node) +static int write_topo_node(struct feat_fd *ff, int node) { char str[MAXPATHLEN]; char field[32]; @@ -695,11 +700,11 @@ static int write_topo_node(int fd, int node) fclose(fp); fp = NULL; - ret = do_write(fd, &mem_total, sizeof(u64)); + ret = do_write(ff, &mem_total, sizeof(u64)); if (ret) goto done; - ret = do_write(fd, &mem_free, sizeof(u64)); + ret = do_write(ff, &mem_free, sizeof(u64)); if (ret) goto done; @@ -717,7 +722,7 @@ static int write_topo_node(int fd, int node) if (p) *p = '\0'; - ret = do_write_string(fd, buf); + ret = do_write_string(ff, buf); done: free(buf); if (fp) @@ -725,8 +730,8 @@ static int write_topo_node(int fd, int node) return ret; } -static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static int write_numa_topology(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) { char *buf = NULL; size_t len = 0; @@ -753,17 +758,17 @@ static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, nr = (u32)node_map->nr; - ret = do_write(fd, &nr, sizeof(nr)); + ret = do_write(ff, &nr, sizeof(nr)); if (ret < 0) goto done; for (i = 0; i < nr; i++) { j = (u32)node_map->map[i]; - ret = do_write(fd, &j, sizeof(j)); + ret = do_write(ff, &j, sizeof(j)); if (ret < 0) break; - ret = write_topo_node(fd, i); + ret = write_topo_node(ff, i); if (ret < 0) break; } @@ -786,16 +791,16 @@ static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, * }; */ -static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, +static int write_pmu_mappings(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct perf_pmu *pmu = NULL; - off_t offset = lseek(fd, 0, SEEK_CUR); + off_t offset = lseek(ff->fd, 0, SEEK_CUR); __u32 pmu_num = 0; int ret; /* write real pmu_num later */ - ret = do_write(fd, &pmu_num, sizeof(pmu_num)); + ret = do_write(ff, &pmu_num, sizeof(pmu_num)); if (ret < 0) return ret; @@ -804,18 +809,18 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, continue; pmu_num++; - ret = do_write(fd, &pmu->type, sizeof(pmu->type)); + ret = do_write(ff, &pmu->type, sizeof(pmu->type)); if (ret < 0) return ret; - ret = do_write_string(fd, pmu->name); + ret = do_write_string(ff, pmu->name); if (ret < 0) return ret; } - if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { + if (pwrite(ff->fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { /* discard all */ - lseek(fd, offset, SEEK_SET); + lseek(ff->fd, offset, SEEK_SET); return -1; } @@ -834,14 +839,14 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, * }[nr_groups]; * }; */ -static int write_group_desc(int fd, struct perf_header *h __maybe_unused, +static int write_group_desc(struct feat_fd *ff, struct perf_evlist *evlist) { u32 nr_groups = evlist->nr_groups; struct perf_evsel *evsel; int ret; - ret = do_write(fd, &nr_groups, sizeof(nr_groups)); + ret = do_write(ff, &nr_groups, sizeof(nr_groups)); if (ret < 0) return ret; @@ -852,15 +857,15 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused, u32 leader_idx = evsel->idx; u32 nr_members = evsel->nr_members; - ret = do_write_string(fd, name); + ret = do_write_string(ff, name); if (ret < 0) return ret; - ret = do_write(fd, &leader_idx, sizeof(leader_idx)); + ret = do_write(ff, &leader_idx, sizeof(leader_idx)); if (ret < 0) return ret; - ret = do_write(fd, &nr_members, sizeof(nr_members)); + ret = do_write(ff, &nr_members, sizeof(nr_members)); if (ret < 0) return ret; } @@ -877,7 +882,7 @@ int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) return -1; } -static int write_cpuid(int fd, struct perf_header *h __maybe_unused, +static int write_cpuid(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { char buffer[64]; @@ -889,25 +894,24 @@ static int write_cpuid(int fd, struct perf_header *h __maybe_unused, return -1; write_it: - return do_write_string(fd, buffer); + return do_write_string(ff, buffer); } -static int write_branch_stack(int fd __maybe_unused, - struct perf_header *h __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static int write_branch_stack(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return 0; } -static int write_auxtrace(int fd, struct perf_header *h, +static int write_auxtrace(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct perf_session *session; int err; - session = container_of(h, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); - err = auxtrace_index__write(fd, &session->auxtrace_index); + err = auxtrace_index__write(ff->fd, &session->auxtrace_index); if (err < 0) pr_err("Failed to write auxtrace index\n"); return err; @@ -1054,8 +1058,8 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) #define MAX_CACHES 2000 -static int write_cache(int fd, struct perf_header *h __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static int write_cache(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) { struct cpu_cache_level caches[MAX_CACHES]; u32 cnt = 0, i, version = 1; @@ -1067,11 +1071,11 @@ static int write_cache(int fd, struct perf_header *h __maybe_unused, qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); - ret = do_write(fd, &version, sizeof(u32)); + ret = do_write(ff, &version, sizeof(u32)); if (ret < 0) goto out; - ret = do_write(fd, &cnt, sizeof(u32)); + ret = do_write(ff, &cnt, sizeof(u32)); if (ret < 0) goto out; @@ -1079,7 +1083,7 @@ static int write_cache(int fd, struct perf_header *h __maybe_unused, struct cpu_cache_level *c = &caches[i]; #define _W(v) \ - ret = do_write(fd, &c->v, sizeof(u32)); \ + ret = do_write(ff, &c->v, sizeof(u32)); \ if (ret < 0) \ goto out; @@ -1090,7 +1094,7 @@ static int write_cache(int fd, struct perf_header *h __maybe_unused, #undef _W #define _W(v) \ - ret = do_write_string(fd, (const char *) c->v); \ + ret = do_write_string(ff, (const char *) c->v); \ if (ret < 0) \ goto out; @@ -1106,8 +1110,7 @@ static int write_cache(int fd, struct perf_header *h __maybe_unused, return ret; } -static int write_stat(int fd __maybe_unused, - struct perf_header *h __maybe_unused, +static int write_stat(struct feat_fd *ff __maybe_unused, struct perf_evlist *evlist __maybe_unused) { return 0; @@ -2105,7 +2108,7 @@ static int process_cache(struct perf_file_section *section __maybe_unused, } struct feature_ops { - int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); + int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); int (*process)(struct perf_file_section *section, struct perf_header *h, int fd, void *data); @@ -2214,29 +2217,29 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } -static int do_write_feat(int fd, struct perf_header *h, int type, +static int do_write_feat(struct feat_fd *ff, int type, struct perf_file_section **p, struct perf_evlist *evlist) { int err; int ret = 0; - if (perf_header__has_feat(h, type)) { + if (perf_header__has_feat(ff->ph, type)) { if (!feat_ops[type].write) return -1; - (*p)->offset = lseek(fd, 0, SEEK_CUR); + (*p)->offset = lseek(ff->fd, 0, SEEK_CUR); - err = feat_ops[type].write(fd, h, evlist); + err = feat_ops[type].write(ff, evlist); if (err < 0) { pr_debug("failed to write feature %s\n", feat_ops[type].name); /* undo anything written */ - lseek(fd, (*p)->offset, SEEK_SET); + lseek(ff->fd, (*p)->offset, SEEK_SET); return -1; } - (*p)->size = lseek(fd, 0, SEEK_CUR) - (*p)->offset; + (*p)->size = lseek(ff->fd, 0, SEEK_CUR) - (*p)->offset; (*p)++; } return ret; @@ -2246,12 +2249,18 @@ static int perf_header__adds_write(struct perf_header *header, struct perf_evlist *evlist, int fd) { int nr_sections; + struct feat_fd ff; struct perf_file_section *feat_sec, *p; int sec_size; u64 sec_start; int feat; int err; + ff = (struct feat_fd){ + .fd = fd, + .ph = header, + }; + nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); if (!nr_sections) return 0; @@ -2266,7 +2275,7 @@ static int perf_header__adds_write(struct perf_header *header, lseek(fd, sec_start + sec_size, SEEK_SET); for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (do_write_feat(fd, header, feat, &p, evlist)) + if (do_write_feat(&ff, feat, &p, evlist)) perf_header__clear_feat(header, feat); } @@ -2275,7 +2284,7 @@ static int perf_header__adds_write(struct perf_header *header, * may write more than needed due to dropped feature, but * this is okay, reader will skip the mising entries */ - err = do_write(fd, feat_sec, sec_size); + err = do_write(&ff, feat_sec, sec_size); if (err < 0) pr_debug("failed to write feature section\n"); free(feat_sec); @@ -2285,14 +2294,17 @@ static int perf_header__adds_write(struct perf_header *header, int perf_header__write_pipe(int fd) { struct perf_pipe_file_header f_header; + struct feat_fd ff; int err; + ff = (struct feat_fd){ .fd = fd }; + f_header = (struct perf_pipe_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), }; - err = do_write(fd, &f_header, sizeof(f_header)); + err = do_write(&ff, &f_header, sizeof(f_header)); if (err < 0) { pr_debug("failed to write perf pipe header\n"); return err; @@ -2309,21 +2321,23 @@ int perf_session__write_header(struct perf_session *session, struct perf_file_attr f_attr; struct perf_header *header = &session->header; struct perf_evsel *evsel; + struct feat_fd ff; u64 attr_offset; int err; + ff = (struct feat_fd){ .fd = fd}; lseek(fd, sizeof(f_header), SEEK_SET); evlist__for_each_entry(session->evlist, evsel) { evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); + err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { pr_debug("failed to write perf header\n"); return err; } } - attr_offset = lseek(fd, 0, SEEK_CUR); + attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { f_attr = (struct perf_file_attr){ @@ -2333,7 +2347,7 @@ int perf_session__write_header(struct perf_session *session, .size = evsel->ids * sizeof(u64), } }; - err = do_write(fd, &f_attr, sizeof(f_attr)); + err = do_write(&ff, &f_attr, sizeof(f_attr)); if (err < 0) { pr_debug("failed to write perf header attribute\n"); return err; @@ -2368,7 +2382,7 @@ int perf_session__write_header(struct perf_session *session, memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features)); lseek(fd, 0, SEEK_SET); - err = do_write(fd, &f_header, sizeof(f_header)); + err = do_write(&ff, &f_header, sizeof(f_header)); if (err < 0) { pr_debug("failed to write perf header\n"); return err; @@ -2643,6 +2657,10 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, int fd, bool repipe) { + struct feat_fd ff = { + .fd = STDOUT_FILENO, + .ph = ph, + }; ssize_t ret; ret = readn(fd, header, sizeof(*header)); @@ -2657,7 +2675,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, if (ph->needs_swap) header->size = bswap_64(header->size); - if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) + if (repipe && do_write(&ff, header, sizeof(*header)) < 0) return -1; return 0; @@ -3165,6 +3183,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, union perf_event ev; struct tracing_data *tdata; ssize_t size = 0, aligned_size = 0, padding; + struct feat_fd ff; int err __maybe_unused = 0; /* @@ -3199,7 +3218,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, */ tracing_data_put(tdata); - if (write_padded(fd, NULL, 0, padding)) + ff = (struct feat_fd){ .fd = fd }; + if (write_padded(&ff, NULL, 0, padding)) return -1; return aligned_size; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e98489c8bba7..9d8dcd5eb727 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -144,9 +144,12 @@ bool is_perf_magic(u64 magic); #define NAME_ALIGN 64 -int do_write(int fd, const void *buf, size_t size); +struct feat_fd; -int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); +int do_write(struct feat_fd *fd, const void *buf, size_t size); + +int write_padded(struct feat_fd *fd, const void *bf, + size_t count, size_t count_aligned); /* * arch specific callback From cfc654209e27ecaa36f550a0934f3c78f9c9179f Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:40 -0700 Subject: [PATCH 070/253] perf header: Use struct feat_fd for print As preparation for using header records in pipe mode, replace int fd with struct feat_fd ff in print functions for all header record types. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-8-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 102 ++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a5db9aded45b..58e888ae13d4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1116,62 +1116,56 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } -static void print_hostname(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_hostname(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# hostname : %s\n", ph->env.hostname); + fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); } -static void print_osrelease(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_osrelease(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# os release : %s\n", ph->env.os_release); + fprintf(fp, "# os release : %s\n", ff->ph->env.os_release); } -static void print_arch(struct perf_header *ph, int fd __maybe_unused, FILE *fp) +static void print_arch(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# arch : %s\n", ph->env.arch); + fprintf(fp, "# arch : %s\n", ff->ph->env.arch); } -static void print_cpudesc(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_cpudesc(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# cpudesc : %s\n", ph->env.cpu_desc); + fprintf(fp, "# cpudesc : %s\n", ff->ph->env.cpu_desc); } -static void print_nrcpus(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_nrcpus(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# nrcpus online : %u\n", ph->env.nr_cpus_online); - fprintf(fp, "# nrcpus avail : %u\n", ph->env.nr_cpus_avail); + fprintf(fp, "# nrcpus online : %u\n", ff->ph->env.nr_cpus_online); + fprintf(fp, "# nrcpus avail : %u\n", ff->ph->env.nr_cpus_avail); } -static void print_version(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_version(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# perf version : %s\n", ph->env.version); + fprintf(fp, "# perf version : %s\n", ff->ph->env.version); } -static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_cmdline(struct feat_fd *ff, FILE *fp) { int nr, i; - nr = ph->env.nr_cmdline; + nr = ff->ph->env.nr_cmdline; fprintf(fp, "# cmdline : "); for (i = 0; i < nr; i++) - fprintf(fp, "%s ", ph->env.cmdline_argv[i]); + fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); fputc('\n', fp); } -static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_cpu_topology(struct feat_fd *ff, FILE *fp) { + struct perf_header *ph = ff->ph; + int cpu_nr = ph->env.nr_cpus_avail; int nr, i; char *str; - int cpu_nr = ph->env.nr_cpus_avail; nr = ph->env.nr_sibling_cores; str = ph->env.sibling_cores; @@ -1297,9 +1291,9 @@ static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, ", %s = %s", name, val); } -static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +static void print_event_desc(struct feat_fd *ff, FILE *fp) { - struct perf_evsel *evsel, *events = read_event_desc(ph, fd); + struct perf_evsel *evsel, *events = read_event_desc(ff->ph, ff->fd); u32 j; u64 *id; @@ -1329,20 +1323,18 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) free_event_desc(events); } -static void print_total_mem(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_total_mem(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# total memory : %Lu kB\n", ph->env.total_mem); + fprintf(fp, "# total memory : %llu kB\n", ff->ph->env.total_mem); } -static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_numa_topology(struct feat_fd *ff, FILE *fp) { int i; struct numa_node *n; - for (i = 0; i < ph->env.nr_numa_nodes; i++) { - n = &ph->env.numa_nodes[i]; + for (i = 0; i < ff->ph->env.nr_numa_nodes; i++) { + n = &ff->ph->env.numa_nodes[i]; fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB," " free = %"PRIu64" kB\n", @@ -1353,56 +1345,51 @@ static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused, } } -static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp) +static void print_cpuid(struct feat_fd *ff, FILE *fp) { - fprintf(fp, "# cpuid : %s\n", ph->env.cpuid); + fprintf(fp, "# cpuid : %s\n", ff->ph->env.cpuid); } -static void print_branch_stack(struct perf_header *ph __maybe_unused, - int fd __maybe_unused, FILE *fp) +static void print_branch_stack(struct feat_fd *ff __maybe_unused, FILE *fp) { fprintf(fp, "# contains samples with branch stack\n"); } -static void print_auxtrace(struct perf_header *ph __maybe_unused, - int fd __maybe_unused, FILE *fp) +static void print_auxtrace(struct feat_fd *ff __maybe_unused, FILE *fp) { fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n"); } -static void print_stat(struct perf_header *ph __maybe_unused, - int fd __maybe_unused, FILE *fp) +static void print_stat(struct feat_fd *ff __maybe_unused, FILE *fp) { fprintf(fp, "# contains stat data\n"); } -static void print_cache(struct perf_header *ph __maybe_unused, - int fd __maybe_unused, FILE *fp __maybe_unused) +static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused) { int i; fprintf(fp, "# CPU cache info:\n"); - for (i = 0; i < ph->env.caches_cnt; i++) { + for (i = 0; i < ff->ph->env.caches_cnt; i++) { fprintf(fp, "# "); - cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + cpu_cache_level__fprintf(fp, &ff->ph->env.caches[i]); } } -static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; char *str, *tmp; u32 pmu_num; u32 type; - pmu_num = ph->env.nr_pmu_mappings; + pmu_num = ff->ph->env.nr_pmu_mappings; if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; } - str = ph->env.pmu_mappings; + str = ff->ph->env.pmu_mappings; while (pmu_num) { type = strtoul(str, &tmp, 0); @@ -1425,14 +1412,13 @@ static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, fprintf(fp, "# pmu mappings: unable to read\n"); } -static void print_group_desc(struct perf_header *ph, int fd __maybe_unused, - FILE *fp) +static void print_group_desc(struct feat_fd *ff, FILE *fp) { struct perf_session *session; struct perf_evsel *evsel; u32 nr = 0; - session = container_of(ph, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); evlist__for_each_entry(session->evlist, evsel) { if (perf_evsel__is_group_leader(evsel) && @@ -2109,7 +2095,7 @@ static int process_cache(struct perf_file_section *section __maybe_unused, struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); - void (*print)(struct perf_header *h, int fd, FILE *fp); + void (*print)(struct feat_fd *ff, FILE *fp); int (*process)(struct perf_file_section *section, struct perf_header *h, int fd, void *data); const char *name; @@ -2162,6 +2148,7 @@ static int perf_file_section__fprintf_info(struct perf_file_section *section, int feat, int fd, void *data) { struct header_print_data *hd = data; + struct feat_fd ff; if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " @@ -2175,8 +2162,13 @@ static int perf_file_section__fprintf_info(struct perf_file_section *section, if (!feat_ops[feat].print) return 0; + ff = (struct feat_fd) { + .fd = fd, + .ph = ph, + }; + if (!feat_ops[feat].full_only || hd->full) - feat_ops[feat].print(ph, fd, hd->fp); + feat_ops[feat].print(&ff, hd->fp); else fprintf(hd->fp, "# %s info available, use -I to display\n", feat_ops[feat].name); From 1a22275449f4bd6255e24f0c5b6c7fa61b263417 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:41 -0700 Subject: [PATCH 071/253] perf header: Use struct feat_fd to process header records As preparation for using header records in pipe-mode, replace int fd with struct feat_fd ff in process functions for all header record types. This patch does not change behavior. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-9-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 164 +++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 85 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 58e888ae13d4..22b52e5636a6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1594,11 +1594,10 @@ static int perf_header__read_build_ids(struct perf_header *header, /* Macro for features that simply need to read and store a string. */ #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct perf_file_section *section __maybe_unused, \ - struct perf_header *ph, int fd, \ - void *data __maybe_unused) \ + struct feat_fd *ff, void *data __maybe_unused) \ {\ - ph->env.__feat_env = do_read_string(fd, ph); \ - return ph->env.__feat_env ? 0 : -ENOMEM; \ + ff->ph->env.__feat_env = do_read_string(ff->fd, ff->ph); \ + return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } FEAT_PROCESS_STR_FUN(hostname, hostname); @@ -1609,52 +1608,49 @@ FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc); FEAT_PROCESS_STR_FUN(cpuid, cpuid); static int process_tracing_data(struct perf_file_section *section __maybe_unused, - struct perf_header *ph __maybe_unused, - int fd, void *data) + struct feat_fd *ff, void *data) { - ssize_t ret = trace_report(fd, data, false); + ssize_t ret = trace_report(ff->fd, data, false); + return ret < 0 ? -1 : 0; } static int process_build_id(struct perf_file_section *section, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { - if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) + if (perf_header__read_build_ids(ff->ph, ff->fd, section->offset, section->size)) pr_debug("Failed to read buildids, continuing...\n"); return 0; } static int process_nrcpus(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { int ret; u32 nr_cpus_avail, nr_cpus_online; - ret = do_read_u32(fd, ph, &nr_cpus_avail); + ret = do_read_u32(ff->fd, ff->ph, &nr_cpus_avail); if (ret) return ret; - ret = do_read_u32(fd, ph, &nr_cpus_online); + ret = do_read_u32(ff->fd, ff->ph, &nr_cpus_online); if (ret) return ret; - ph->env.nr_cpus_avail = (int)nr_cpus_avail; - ph->env.nr_cpus_online = (int)nr_cpus_online; + ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail; + ff->ph->env.nr_cpus_online = (int)nr_cpus_online; return 0; } static int process_total_mem(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { u64 total_mem; int ret; - ret = do_read_u64(fd, ph, &total_mem); + ret = do_read_u64(ff->fd, ff->ph, &total_mem); if (ret) return -1; - ph->env.total_mem = (unsigned long long)total_mem; + ff->ph->env.total_mem = (unsigned long long)total_mem; return 0; } @@ -1692,16 +1688,15 @@ perf_evlist__set_event_name(struct perf_evlist *evlist, static int process_event_desc(struct perf_file_section *section __maybe_unused, - struct perf_header *header, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { struct perf_session *session; - struct perf_evsel *evsel, *events = read_event_desc(header, fd); + struct perf_evsel *evsel, *events = read_event_desc(ff->ph, ff->fd); if (!events) return 0; - session = container_of(header, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); for (evsel = events; evsel->attr.size; evsel++) perf_evlist__set_event_name(session->evlist, evsel); @@ -1710,17 +1705,16 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section, - struct perf_header *ph, int fd, - void *data __maybe_unused) +static int process_cmdline(struct perf_file_section *section __maybe_unused, + struct feat_fd *ff, void *data __maybe_unused) { char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ff->ph, &nr)) return -1; - ph->env.nr_cmdline = nr; + ff->ph->env.nr_cmdline = nr; cmdline = zalloc(section->size + nr + 1); if (!cmdline) @@ -1731,7 +1725,7 @@ static int process_cmdline(struct perf_file_section *section, goto error; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); + str = do_read_string(ff->fd, ff->ph); if (!str) goto error; @@ -1740,8 +1734,8 @@ static int process_cmdline(struct perf_file_section *section, len += strlen(str) + 1; free(str); } - ph->env.cmdline = cmdline; - ph->env.cmdline_argv = (const char **) argv; + ff->ph->env.cmdline = cmdline; + ff->ph->env.cmdline_argv = (const char **) argv; return 0; error: @@ -1750,21 +1744,21 @@ static int process_cmdline(struct perf_file_section *section, return -1; } -static int process_cpu_topology(struct perf_file_section *section, - struct perf_header *ph, int fd, - void *data __maybe_unused) +static int process_cpu_topology(struct perf_file_section *section __maybe_unused, + struct feat_fd *ff, void *data __maybe_unused) { u32 nr, i; char *str; struct strbuf sb; - int cpu_nr = ph->env.nr_cpus_avail; + int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; + struct perf_header *ph = ff->ph; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) return -1; - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ph, &nr)) goto free_cpu; ph->env.nr_sibling_cores = nr; @@ -1773,7 +1767,7 @@ static int process_cpu_topology(struct perf_file_section *section, goto free_cpu; for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); + str = do_read_string(ff->fd, ph); if (!str) goto error; @@ -1785,14 +1779,14 @@ static int process_cpu_topology(struct perf_file_section *section, } ph->env.sibling_cores = strbuf_detach(&sb, NULL); - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ph, &nr)) return -1; ph->env.nr_sibling_threads = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { - str = do_read_string(fd, ph); + str = do_read_string(ff->fd, ph); if (!str) goto error; @@ -1814,12 +1808,12 @@ static int process_cpu_topology(struct perf_file_section *section, } for (i = 0; i < (u32)cpu_nr; i++) { - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ph, &nr)) goto free_cpu; ph->env.cpu[i].core_id = nr; - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ph, &nr)) goto free_cpu; if (nr != (u32)-1 && nr > (u32)cpu_nr) { @@ -1841,15 +1835,14 @@ static int process_cpu_topology(struct perf_file_section *section, } static int process_numa_topology(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { struct numa_node *nodes, *n; u32 nr, i; char *str; /* nr nodes */ - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff->fd, ff->ph, &nr)) return -1; nodes = zalloc(sizeof(*nodes) * nr); @@ -1860,16 +1853,16 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse n = &nodes[i]; /* node number */ - if (do_read_u32(fd, ph, &n->node)) + if (do_read_u32(ff->fd, ff->ph, &n->node)) goto error; - if (do_read_u64(fd, ph, &n->mem_total)) + if (do_read_u64(ff->fd, ff->ph, &n->mem_total)) goto error; - if (do_read_u64(fd, ph, &n->mem_free)) + if (do_read_u64(ff->fd, ff->ph, &n->mem_free)) goto error; - str = do_read_string(fd, ph); + str = do_read_string(ff->fd, ff->ph); if (!str) goto error; @@ -1879,8 +1872,8 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse free(str); } - ph->env.nr_numa_nodes = nr; - ph->env.numa_nodes = nodes; + ff->ph->env.nr_numa_nodes = nr; + ff->ph->env.numa_nodes = nodes; return 0; error: @@ -1889,15 +1882,14 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse } static int process_pmu_mappings(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { char *name; u32 pmu_num; u32 type; struct strbuf sb; - if (do_read_u32(fd, ph, &pmu_num)) + if (do_read_u32(ff->fd, ff->ph, &pmu_num)) return -1; if (!pmu_num) { @@ -1905,15 +1897,15 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused return 0; } - ph->env.nr_pmu_mappings = pmu_num; + ff->ph->env.nr_pmu_mappings = pmu_num; if (strbuf_init(&sb, 128) < 0) return -1; while (pmu_num) { - if (do_read_u32(fd, ph, &type)) + if (do_read_u32(ff->fd, ff->ph, &type)) goto error; - name = do_read_string(fd, ph); + name = do_read_string(ff->fd, ff->ph); if (!name) goto error; @@ -1924,12 +1916,12 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused goto error; if (!strcmp(name, "msr")) - ph->env.msr_pmu_type = type; + ff->ph->env.msr_pmu_type = type; free(name); pmu_num--; } - ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL); return 0; error: @@ -1938,8 +1930,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused } static int process_group_desc(struct perf_file_section *section __maybe_unused, - struct perf_header *ph, int fd, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { size_t ret = -1; u32 i, nr, nr_groups; @@ -1951,10 +1942,10 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, u32 nr_members; } *desc; - if (do_read_u32(fd, ph, &nr_groups)) + if (do_read_u32(ff->fd, ff->ph, &nr_groups)) return -1; - ph->env.nr_groups = nr_groups; + ff->ph->env.nr_groups = nr_groups; if (!nr_groups) { pr_debug("group desc not available\n"); return 0; @@ -1965,21 +1956,21 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, return -1; for (i = 0; i < nr_groups; i++) { - desc[i].name = do_read_string(fd, ph); + desc[i].name = do_read_string(ff->fd, ff->ph); if (!desc[i].name) goto out_free; - if (do_read_u32(fd, ph, &desc[i].leader_idx)) + if (do_read_u32(ff->fd, ff->ph, &desc[i].leader_idx)) goto out_free; - if (do_read_u32(fd, ph, &desc[i].nr_members)) + if (do_read_u32(ff->fd, ff->ph, &desc[i].nr_members)) goto out_free; } /* * Rebuild group relationship based on the group_desc */ - session = container_of(ph, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); session->evlist->nr_groups = nr_groups; i = nr = 0; @@ -2023,36 +2014,34 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, return ret; } -static int process_auxtrace(struct perf_file_section *section, - struct perf_header *ph, int fd, - void *data __maybe_unused) +static int process_auxtrace(struct perf_file_section *section __maybe_unused, + struct feat_fd *ff, void *data __maybe_unused) { struct perf_session *session; int err; - session = container_of(ph, struct perf_session, header); + session = container_of(ff->ph, struct perf_session, header); - err = auxtrace_index__process(fd, section->size, session, - ph->needs_swap); + err = auxtrace_index__process(ff->fd, section->size, session, + ff->ph->needs_swap); if (err < 0) pr_err("Failed to process auxtrace index\n"); return err; } static int process_cache(struct perf_file_section *section __maybe_unused, - struct perf_header *ph __maybe_unused, int fd __maybe_unused, - void *data __maybe_unused) + struct feat_fd *ff, void *data __maybe_unused) { struct cpu_cache_level *caches; u32 cnt, i, version; - if (do_read_u32(fd, ph, &version)) + if (do_read_u32(ff->fd, ff->ph, &version)) return -1; if (version != 1) return -1; - if (do_read_u32(fd, ph, &cnt)) + if (do_read_u32(ff->fd, ff->ph, &cnt)) return -1; caches = zalloc(sizeof(*caches) * cnt); @@ -2063,7 +2052,7 @@ static int process_cache(struct perf_file_section *section __maybe_unused, struct cpu_cache_level c; #define _R(v) \ - if (do_read_u32(fd, ph, &c.v))\ + if (do_read_u32(ff->fd, ff->ph, &c.v))\ goto out_free_caches; \ _R(level) @@ -2072,9 +2061,9 @@ static int process_cache(struct perf_file_section *section __maybe_unused, _R(ways) #undef _R - #define _R(v) \ - c.v = do_read_string(fd, ph); \ - if (!c.v) \ + #define _R(v) \ + c.v = do_read_string(ff->fd, ff->ph); \ + if (!c.v) \ goto out_free_caches; _R(type) @@ -2085,8 +2074,8 @@ static int process_cache(struct perf_file_section *section __maybe_unused, caches[i] = c; } - ph->env.caches = caches; - ph->env.caches_cnt = cnt; + ff->ph->env.caches = caches; + ff->ph->env.caches_cnt = cnt; return 0; out_free_caches: free(caches); @@ -2097,7 +2086,7 @@ struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); int (*process)(struct perf_file_section *section, - struct perf_header *h, int fd, void *data); + struct feat_fd *ff, void *data); const char *name; bool full_only; }; @@ -2628,6 +2617,11 @@ static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, int feat, int fd, void *data) { + struct feat_fd ff = { + .fd = fd, + .ph = ph, + }; + if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " "%d, continuing...\n", section->offset, feat); @@ -2642,7 +2636,7 @@ static int perf_file_section__process(struct perf_file_section *section, if (!feat_ops[feat].process) return 0; - return feat_ops[feat].process(section, ph, fd, data); + return feat_ops[feat].process(section, &ff, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, From 625524572391326b83f906efe02abbaac9debce6 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:42 -0700 Subject: [PATCH 072/253] perf header: Don't pass struct perf_file_section to process_##_feat struct perf_file_section is used in process_##_feat as container for size and offset in the file descriptor. These attributes are meaninful in pipe-mode but struct perf_file_section is not. Add offset and size variables to struct feat_fd to store perf_file_section's values in file-mode. Later on, the same variables can be reused for pipe-mode. This patch does not change behavior. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-10-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 58 +++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 22b52e5636a6..60394d27e8f5 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -62,6 +62,8 @@ struct perf_file_attr { struct feat_fd { struct perf_header *ph; int fd; + ssize_t offset; + size_t size; }; void perf_header__set_feat(struct perf_header *header, int feat) @@ -1593,8 +1595,7 @@ static int perf_header__read_build_ids(struct perf_header *header, /* Macro for features that simply need to read and store a string. */ #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ -static int process_##__feat(struct perf_file_section *section __maybe_unused, \ - struct feat_fd *ff, void *data __maybe_unused) \ +static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ ff->ph->env.__feat_env = do_read_string(ff->fd, ff->ph); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ @@ -1607,24 +1608,21 @@ FEAT_PROCESS_STR_FUN(arch, arch); FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc); FEAT_PROCESS_STR_FUN(cpuid, cpuid); -static int process_tracing_data(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data) +static int process_tracing_data(struct feat_fd *ff, void *data) { ssize_t ret = trace_report(ff->fd, data, false); return ret < 0 ? -1 : 0; } -static int process_build_id(struct perf_file_section *section, - struct feat_fd *ff, void *data __maybe_unused) +static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) { - if (perf_header__read_build_ids(ff->ph, ff->fd, section->offset, section->size)) + if (perf_header__read_build_ids(ff->ph, ff->fd, ff->offset, ff->size)) pr_debug("Failed to read buildids, continuing...\n"); return 0; } -static int process_nrcpus(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) { int ret; u32 nr_cpus_avail, nr_cpus_online; @@ -1641,8 +1639,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, return 0; } -static int process_total_mem(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) { u64 total_mem; int ret; @@ -1687,8 +1684,7 @@ perf_evlist__set_event_name(struct perf_evlist *evlist, } static int -process_event_desc(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +process_event_desc(struct feat_fd *ff, void *data __maybe_unused) { struct perf_session *session; struct perf_evsel *evsel, *events = read_event_desc(ff->ph, ff->fd); @@ -1705,8 +1701,7 @@ process_event_desc(struct perf_file_section *section __maybe_unused, return 0; } -static int process_cmdline(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) { char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; @@ -1716,7 +1711,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, ff->ph->env.nr_cmdline = nr; - cmdline = zalloc(section->size + nr + 1); + cmdline = zalloc(ff->size + nr + 1); if (!cmdline) return -1; @@ -1744,8 +1739,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, return -1; } -static int process_cpu_topology(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) { u32 nr, i; char *str; @@ -1802,7 +1796,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused * The header may be from old perf, * which doesn't include core id and socket id information. */ - if (section->size <= size) { + if (ff->size <= size) { zfree(&ph->env.cpu); return 0; } @@ -1834,8 +1828,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused return -1; } -static int process_numa_topology(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) { struct numa_node *nodes, *n; u32 nr, i; @@ -1881,8 +1874,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse return -1; } -static int process_pmu_mappings(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) { char *name; u32 pmu_num; @@ -1929,8 +1921,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused return -1; } -static int process_group_desc(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) { size_t ret = -1; u32 i, nr, nr_groups; @@ -2014,23 +2005,21 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, return ret; } -static int process_auxtrace(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused) { struct perf_session *session; int err; session = container_of(ff->ph, struct perf_session, header); - err = auxtrace_index__process(ff->fd, section->size, session, + err = auxtrace_index__process(ff->fd, ff->size, session, ff->ph->needs_swap); if (err < 0) pr_err("Failed to process auxtrace index\n"); return err; } -static int process_cache(struct perf_file_section *section __maybe_unused, - struct feat_fd *ff, void *data __maybe_unused) +static int process_cache(struct feat_fd *ff, void *data __maybe_unused) { struct cpu_cache_level *caches; u32 cnt, i, version; @@ -2085,8 +2074,7 @@ static int process_cache(struct perf_file_section *section __maybe_unused, struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); - int (*process)(struct perf_file_section *section, - struct feat_fd *ff, void *data); + int (*process)(struct feat_fd *ff, void *data); const char *name; bool full_only; }; @@ -2617,9 +2605,11 @@ static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, int feat, int fd, void *data) { - struct feat_fd ff = { + struct feat_fd fdd = { .fd = fd, .ph = ph, + .size = section->size, + .offset = section->offset, }; if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { @@ -2636,7 +2626,7 @@ static int perf_file_section__process(struct perf_file_section *section, if (!feat_ops[feat].process) return 0; - return feat_ops[feat].process(section, &ff, data); + return feat_ops[feat].process(&fdd, data); } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, From 48e5fcea386009fb2515158fdaf8586ce72d86ce Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:43 -0700 Subject: [PATCH 073/253] perf header: Use struct feat_fd in read header records As preparation for using header records in-pipe mode, replace int fd with struct feat_fd ff in read functions for all header record types. This patch does not change behavior. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-11-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 101 +++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 60394d27e8f5..14db9f204b0a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -126,54 +126,54 @@ static int do_write_string(struct feat_fd *ff, const char *str) return write_padded(ff, str, olen, len); } -static int __do_read(int fd, void *addr, ssize_t size) +static int __do_read(struct feat_fd *ff, void *addr, ssize_t size) { - ssize_t ret = readn(fd, addr, size); + ssize_t ret = readn(ff->fd, addr, size); if (ret != size) return ret < 0 ? (int)ret : -1; return 0; } -static int do_read_u32(int fd, struct perf_header *ph, u32 *addr) +static int do_read_u32(struct feat_fd *ff, u32 *addr) { int ret; - ret = __do_read(fd, addr, sizeof(*addr)); + ret = __do_read(ff, addr, sizeof(*addr)); if (ret) return ret; - if (ph->needs_swap) + if (ff->ph->needs_swap) *addr = bswap_32(*addr); return 0; } -static int do_read_u64(int fd, struct perf_header *ph, u64 *addr) +static int do_read_u64(struct feat_fd *ff, u64 *addr) { int ret; - ret = __do_read(fd, addr, sizeof(*addr)); + ret = __do_read(ff, addr, sizeof(*addr)); if (ret) return ret; - if (ph->needs_swap) + if (ff->ph->needs_swap) *addr = bswap_64(*addr); return 0; } -static char *do_read_string(int fd, struct perf_header *ph) +static char *do_read_string(struct feat_fd *ff) { u32 len; char *buf; - if (do_read_u32(fd, ph, &len)) + if (do_read_u32(ff, &len)) return NULL; buf = malloc(len); if (!buf) return NULL; - if (!__do_read(fd, buf, len)) { + if (!__do_read(ff, buf, len)) { /* * strings are padded by zeroes * thus the actual strlen of buf @@ -1208,8 +1208,7 @@ static void free_event_desc(struct perf_evsel *events) free(events); } -static struct perf_evsel * -read_event_desc(struct perf_header *ph, int fd) +static struct perf_evsel *read_event_desc(struct feat_fd *ff) { struct perf_evsel *evsel, *events = NULL; u64 *id; @@ -1218,10 +1217,10 @@ read_event_desc(struct perf_header *ph, int fd) size_t msz; /* number of events */ - if (do_read_u32(fd, ph, &nre)) + if (do_read_u32(ff, &nre)) goto error; - if (do_read_u32(fd, ph, &sz)) + if (do_read_u32(ff, &sz)) goto error; /* buffer to hold on file attr struct */ @@ -1245,21 +1244,21 @@ read_event_desc(struct perf_header *ph, int fd) * must read entire on-file attr struct to * sync up with layout. */ - if (__do_read(fd, buf, sz)) + if (__do_read(ff, buf, sz)) goto error; - if (ph->needs_swap) + if (ff->ph->needs_swap) perf_event__attr_swap(buf); memcpy(&evsel->attr, buf, msz); - if (do_read_u32(fd, ph, &nr)) + if (do_read_u32(ff, &nr)) goto error; - if (ph->needs_swap) + if (ff->ph->needs_swap) evsel->needs_swap = true; - evsel->name = do_read_string(fd, ph); + evsel->name = do_read_string(ff); if (!evsel->name) goto error; @@ -1273,7 +1272,7 @@ read_event_desc(struct perf_header *ph, int fd) evsel->id = id; for (j = 0 ; j < nr; j++) { - if (do_read_u64(fd, ph, id)) + if (do_read_u64(ff, id)) goto error; id++; } @@ -1295,7 +1294,7 @@ static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, static void print_event_desc(struct feat_fd *ff, FILE *fp) { - struct perf_evsel *evsel, *events = read_event_desc(ff->ph, ff->fd); + struct perf_evsel *evsel, *events = read_event_desc(ff); u32 j; u64 *id; @@ -1597,7 +1596,7 @@ static int perf_header__read_build_ids(struct perf_header *header, #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ - ff->ph->env.__feat_env = do_read_string(ff->fd, ff->ph); \ + ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -1627,11 +1626,11 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) int ret; u32 nr_cpus_avail, nr_cpus_online; - ret = do_read_u32(ff->fd, ff->ph, &nr_cpus_avail); + ret = do_read_u32(ff, &nr_cpus_avail); if (ret) return ret; - ret = do_read_u32(ff->fd, ff->ph, &nr_cpus_online); + ret = do_read_u32(ff, &nr_cpus_online); if (ret) return ret; ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail; @@ -1644,7 +1643,7 @@ static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) u64 total_mem; int ret; - ret = do_read_u64(ff->fd, ff->ph, &total_mem); + ret = do_read_u64(ff, &total_mem); if (ret) return -1; ff->ph->env.total_mem = (unsigned long long)total_mem; @@ -1687,7 +1686,7 @@ static int process_event_desc(struct feat_fd *ff, void *data __maybe_unused) { struct perf_session *session; - struct perf_evsel *evsel, *events = read_event_desc(ff->ph, ff->fd); + struct perf_evsel *evsel, *events = read_event_desc(ff); if (!events) return 0; @@ -1706,7 +1705,7 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; - if (do_read_u32(ff->fd, ff->ph, &nr)) + if (do_read_u32(ff, &nr)) return -1; ff->ph->env.nr_cmdline = nr; @@ -1720,7 +1719,7 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) goto error; for (i = 0; i < nr; i++) { - str = do_read_string(ff->fd, ff->ph); + str = do_read_string(ff); if (!str) goto error; @@ -1752,7 +1751,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (!ph->env.cpu) return -1; - if (do_read_u32(ff->fd, ph, &nr)) + if (do_read_u32(ff, &nr)) goto free_cpu; ph->env.nr_sibling_cores = nr; @@ -1761,7 +1760,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) goto free_cpu; for (i = 0; i < nr; i++) { - str = do_read_string(ff->fd, ph); + str = do_read_string(ff); if (!str) goto error; @@ -1773,14 +1772,14 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) } ph->env.sibling_cores = strbuf_detach(&sb, NULL); - if (do_read_u32(ff->fd, ph, &nr)) + if (do_read_u32(ff, &nr)) return -1; ph->env.nr_sibling_threads = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { - str = do_read_string(ff->fd, ph); + str = do_read_string(ff); if (!str) goto error; @@ -1802,12 +1801,12 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) } for (i = 0; i < (u32)cpu_nr; i++) { - if (do_read_u32(ff->fd, ph, &nr)) + if (do_read_u32(ff, &nr)) goto free_cpu; ph->env.cpu[i].core_id = nr; - if (do_read_u32(ff->fd, ph, &nr)) + if (do_read_u32(ff, &nr)) goto free_cpu; if (nr != (u32)-1 && nr > (u32)cpu_nr) { @@ -1835,7 +1834,7 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) char *str; /* nr nodes */ - if (do_read_u32(ff->fd, ff->ph, &nr)) + if (do_read_u32(ff, &nr)) return -1; nodes = zalloc(sizeof(*nodes) * nr); @@ -1846,16 +1845,16 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) n = &nodes[i]; /* node number */ - if (do_read_u32(ff->fd, ff->ph, &n->node)) + if (do_read_u32(ff, &n->node)) goto error; - if (do_read_u64(ff->fd, ff->ph, &n->mem_total)) + if (do_read_u64(ff, &n->mem_total)) goto error; - if (do_read_u64(ff->fd, ff->ph, &n->mem_free)) + if (do_read_u64(ff, &n->mem_free)) goto error; - str = do_read_string(ff->fd, ff->ph); + str = do_read_string(ff); if (!str) goto error; @@ -1881,7 +1880,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) u32 type; struct strbuf sb; - if (do_read_u32(ff->fd, ff->ph, &pmu_num)) + if (do_read_u32(ff, &pmu_num)) return -1; if (!pmu_num) { @@ -1894,10 +1893,10 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) return -1; while (pmu_num) { - if (do_read_u32(ff->fd, ff->ph, &type)) + if (do_read_u32(ff, &type)) goto error; - name = do_read_string(ff->fd, ff->ph); + name = do_read_string(ff); if (!name) goto error; @@ -1933,7 +1932,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) u32 nr_members; } *desc; - if (do_read_u32(ff->fd, ff->ph, &nr_groups)) + if (do_read_u32(ff, &nr_groups)) return -1; ff->ph->env.nr_groups = nr_groups; @@ -1947,14 +1946,14 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) return -1; for (i = 0; i < nr_groups; i++) { - desc[i].name = do_read_string(ff->fd, ff->ph); + desc[i].name = do_read_string(ff); if (!desc[i].name) goto out_free; - if (do_read_u32(ff->fd, ff->ph, &desc[i].leader_idx)) + if (do_read_u32(ff, &desc[i].leader_idx)) goto out_free; - if (do_read_u32(ff->fd, ff->ph, &desc[i].nr_members)) + if (do_read_u32(ff, &desc[i].nr_members)) goto out_free; } @@ -2024,13 +2023,13 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) struct cpu_cache_level *caches; u32 cnt, i, version; - if (do_read_u32(ff->fd, ff->ph, &version)) + if (do_read_u32(ff, &version)) return -1; if (version != 1) return -1; - if (do_read_u32(ff->fd, ff->ph, &cnt)) + if (do_read_u32(ff, &cnt)) return -1; caches = zalloc(sizeof(*caches) * cnt); @@ -2041,7 +2040,7 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) struct cpu_cache_level c; #define _R(v) \ - if (do_read_u32(ff->fd, ff->ph, &c.v))\ + if (do_read_u32(ff, &c.v))\ goto out_free_caches; \ _R(level) @@ -2051,7 +2050,7 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) #undef _R #define _R(v) \ - c.v = do_read_string(ff->fd, ff->ph); \ + c.v = do_read_string(ff); \ if (!c.v) \ goto out_free_caches; From a02c395cccc95e40b4c506c78857e24fdb049096 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:44 -0700 Subject: [PATCH 074/253] perf header: Make write_pmu_mappings pipe-mode friendly In pipe-mode, we will operate over a buffer instead of a file descriptor but write_pmu_mappings uses lseek to move over the perf.data file. Refactor write_pmu_mappings to avoid the usage of lseek and allow reusing the same logic in pipe-mode (next patch). Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-12-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 14db9f204b0a..d5359e3384e3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -797,11 +797,19 @@ static int write_pmu_mappings(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { struct perf_pmu *pmu = NULL; - off_t offset = lseek(ff->fd, 0, SEEK_CUR); - __u32 pmu_num = 0; + u32 pmu_num = 0; int ret; - /* write real pmu_num later */ + /* + * Do a first pass to count number of pmu to avoid lseek so this + * works in pipe mode as well. + */ + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + } + ret = do_write(ff, &pmu_num, sizeof(pmu_num)); if (ret < 0) return ret; @@ -809,7 +817,6 @@ static int write_pmu_mappings(struct feat_fd *ff, while ((pmu = perf_pmu__scan(pmu))) { if (!pmu->name) continue; - pmu_num++; ret = do_write(ff, &pmu->type, sizeof(pmu->type)); if (ret < 0) @@ -820,12 +827,6 @@ static int write_pmu_mappings(struct feat_fd *ff, return ret; } - if (pwrite(ff->fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { - /* discard all */ - lseek(ff->fd, offset, SEEK_SET); - return -1; - } - return 0; } From 0b3d34106c18e5d9ebba004f52a2ce8b264c493e Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:45 -0700 Subject: [PATCH 075/253] perf header: Add a buffer to struct feat_fd Extend struct feat_fd to use a temporal buffer in pipe-mode, instead of perf.data's file descriptor. The header features build_id and aux_trace already have logic to print in file-mode that heavily rely on lseek the file. For now, leave such features inactive in pipe-mode and print a warning if their functions are called in pipe-mode. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-13-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 81 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d5359e3384e3..5e6d4d29a7a3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -62,6 +62,7 @@ struct perf_file_attr { struct feat_fd { struct perf_header *ph; int fd; + void *buf; /* Either buf != NULL or fd >= 0 */ ssize_t offset; size_t size; }; @@ -81,16 +82,49 @@ bool perf_header__has_feat(const struct perf_header *header, int feat) return test_bit(feat, header->adds_features); } +static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size) +{ + ssize_t ret = writen(ff->fd, buf, size); + + if (ret != (ssize_t)size) + return ret < 0 ? (int)ret : -1; + return 0; +} + +static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size) +{ + /* struct perf_event_header::size is u16 */ + const size_t max_size = 0xffff - sizeof(struct perf_event_header); + size_t new_size = ff->size; + void *addr; + + if (size + ff->offset > max_size) + return -E2BIG; + + while (size > (new_size - ff->offset)) + new_size <<= 1; + new_size = min(max_size, new_size); + + if (ff->size < new_size) { + addr = realloc(ff->buf, new_size); + if (!addr) + return -ENOMEM; + ff->buf = addr; + ff->size = new_size; + } + + memcpy(ff->buf + ff->offset, buf, size); + ff->offset += size; + + return 0; +} + /* Return: 0 if succeded, -ERR if failed. */ int do_write(struct feat_fd *ff, const void *buf, size_t size) { - ssize_t ret; - - ret = writen(ff->fd, buf, size); - if (ret != (ssize_t)size) - return ret < 0 ? (int)ret : -1; - - return 0; + if (!ff->buf) + return __do_write_fd(ff, buf, size); + return __do_write_buf(ff, buf, size); } /* Return: 0 if succeded, -ERR if failed. */ @@ -126,7 +160,7 @@ static int do_write_string(struct feat_fd *ff, const char *str) return write_padded(ff, str, olen, len); } -static int __do_read(struct feat_fd *ff, void *addr, ssize_t size) +static int __do_read_fd(struct feat_fd *ff, void *addr, ssize_t size) { ssize_t ret = readn(ff->fd, addr, size); @@ -135,6 +169,25 @@ static int __do_read(struct feat_fd *ff, void *addr, ssize_t size) return 0; } +static int __do_read_buf(struct feat_fd *ff, void *addr, ssize_t size) +{ + if (size > (ssize_t)ff->size - ff->offset) + return -1; + + memcpy(addr, ff->buf + ff->offset, size); + ff->offset += size; + + return 0; + +} + +static int __do_read(struct feat_fd *ff, void *addr, ssize_t size) +{ + if (!ff->buf) + return __do_read_fd(ff, addr, size); + return __do_read_buf(ff, addr, size); +} + static int do_read_u32(struct feat_fd *ff, u32 *addr) { int ret; @@ -189,6 +242,9 @@ static char *do_read_string(struct feat_fd *ff) static int write_tracing_data(struct feat_fd *ff, struct perf_evlist *evlist) { + if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__)) + return -1; + return read_tracing_data(ff->fd, &evlist->entries); } @@ -203,6 +259,9 @@ static int write_build_id(struct feat_fd *ff, if (!perf_session__read_build_ids(session, true)) return -1; + if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__)) + return -1; + err = perf_session__write_buildid_table(session, ff); if (err < 0) { pr_debug("failed to write buildid table\n"); @@ -912,6 +971,9 @@ static int write_auxtrace(struct feat_fd *ff, struct perf_session *session; int err; + if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__)) + return -1; + session = container_of(ff->ph, struct perf_session, header); err = auxtrace_index__write(ff->fd, &session->auxtrace_index); @@ -2197,6 +2259,9 @@ static int do_write_feat(struct feat_fd *ff, int type, if (!feat_ops[type].write) return -1; + if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__)) + return -1; + (*p)->offset = lseek(ff->fd, 0, SEEK_CUR); err = feat_ops[type].write(ff, evlist); From a4d8c9855a260359655f2a302ee2b231cad379ca Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:46 -0700 Subject: [PATCH 076/253] perf header: Change FEAT_OP* macros There are three FEAT_OP* macros: - FEAT_OPA: for features without process record. - FEAT_OPP: for features with process record. - FEAT_OPF: like FEAT_OPP but to show only if show_full_info flags is set. To add pipe-mode headers we need yet another variation of the macros (one to specify whether a feature generates an auxiliar record). Instead, we redefine macros so that: - show_full_info is specified as an argument (to remove the FEAT_OPF variation) and, - it always sets "process" handler (to remove the FEAT_OPA variation). Individual process handlers can be NULLed individually. This allows to define two variations only: - FEAT_OPR: synthesizes auxiliar event record. - FEAT_OPN: doesn't synthesize an auxiliar event record. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-14-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 72 ++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5e6d4d29a7a3..0fdbf7554ea3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -2139,42 +2140,57 @@ struct feature_ops { int (*process)(struct feat_fd *ff, void *data); const char *name; bool full_only; + bool synthesize; }; -#define FEAT_OPA(n, func) \ - [n] = { .name = #n, .write = write_##func, .print = print_##func } -#define FEAT_OPP(n, func) \ - [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .process = process_##func } -#define FEAT_OPF(n, func) \ - [n] = { .name = #n, .write = write_##func, .print = print_##func, \ - .process = process_##func, .full_only = true } +#define FEAT_OPR(n, func, __full_only) \ + [HEADER_##n] = { \ + .name = __stringify(n), \ + .write = write_##func, \ + .print = print_##func, \ + .full_only = __full_only, \ + .process = process_##func, \ + .synthesize = true \ + } + +#define FEAT_OPN(n, func, __full_only) \ + [HEADER_##n] = { \ + .name = __stringify(n), \ + .write = write_##func, \ + .print = print_##func, \ + .full_only = __full_only, \ + .process = process_##func \ + } /* feature_ops not implemented: */ #define print_tracing_data NULL #define print_build_id NULL +#define process_branch_stack NULL +#define process_stat NULL + + static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { - FEAT_OPP(HEADER_TRACING_DATA, tracing_data), - FEAT_OPP(HEADER_BUILD_ID, build_id), - FEAT_OPP(HEADER_HOSTNAME, hostname), - FEAT_OPP(HEADER_OSRELEASE, osrelease), - FEAT_OPP(HEADER_VERSION, version), - FEAT_OPP(HEADER_ARCH, arch), - FEAT_OPP(HEADER_NRCPUS, nrcpus), - FEAT_OPP(HEADER_CPUDESC, cpudesc), - FEAT_OPP(HEADER_CPUID, cpuid), - FEAT_OPP(HEADER_TOTAL_MEM, total_mem), - FEAT_OPP(HEADER_EVENT_DESC, event_desc), - FEAT_OPP(HEADER_CMDLINE, cmdline), - FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), - FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), - FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), - FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), - FEAT_OPP(HEADER_GROUP_DESC, group_desc), - FEAT_OPP(HEADER_AUXTRACE, auxtrace), - FEAT_OPA(HEADER_STAT, stat), - FEAT_OPF(HEADER_CACHE, cache), + FEAT_OPN(TRACING_DATA, tracing_data, false), + FEAT_OPN(BUILD_ID, build_id, false), + FEAT_OPR(HOSTNAME, hostname, false), + FEAT_OPR(OSRELEASE, osrelease, false), + FEAT_OPR(VERSION, version, false), + FEAT_OPR(ARCH, arch, false), + FEAT_OPR(NRCPUS, nrcpus, false), + FEAT_OPR(CPUDESC, cpudesc, false), + FEAT_OPR(CPUID, cpuid, false), + FEAT_OPR(TOTAL_MEM, total_mem, false), + FEAT_OPR(EVENT_DESC, event_desc, false), + FEAT_OPR(CMDLINE, cmdline, false), + FEAT_OPR(CPU_TOPOLOGY, cpu_topology, true), + FEAT_OPR(NUMA_TOPOLOGY, numa_topology, true), + FEAT_OPN(BRANCH_STACK, branch_stack, false), + FEAT_OPR(PMU_MAPPINGS, pmu_mappings, false), + FEAT_OPN(GROUP_DESC, group_desc, false), + FEAT_OPN(AUXTRACE, auxtrace, false), + FEAT_OPN(STAT, stat, false), + FEAT_OPN(CACHE, cache, true), }; struct header_print_data { From 114f709e01e62760a6d03de1358188293dfefdda Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:47 -0700 Subject: [PATCH 077/253] perf tool: Add show_feature_header to perf_tool Add show_feat_hdr to control level of printed information of feature headers. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-15-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ++++ tools/perf/builtin-script.c | 3 +++ tools/perf/util/tool.h | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 79a33eb1a10d..40c3a92c8006 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -988,6 +988,10 @@ int cmd_report(int argc, const char **argv) /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; + if (report.header || report.header_only) + report.tool.show_feat_hdr = SHOW_FEAT_HEADER; + if (report.show_full_info) + report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; if (strcmp(input_name, "-") != 0) setup_browser(true); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 83cdc0a61fd6..6e44552a0551 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2972,10 +2972,13 @@ int cmd_script(int argc, const char **argv) return -1; if (header || header_only) { + script.tool.show_feat_hdr = SHOW_FEAT_HEADER; perf_session__fprintf_info(session, stdout, show_full_info); if (header_only) goto out_delete; } + if (show_full_info) + script.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; if (symbol__init(&session->header.env) < 0) goto out_delete; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 829471a1c6d7..baeca808dfda 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -34,6 +34,12 @@ typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +enum show_feature_header { + SHOW_FEAT_NO_HEADER = 0, + SHOW_FEAT_HEADER, + SHOW_FEAT_HEADER_FULL_INFO, +}; + struct perf_tool { event_sample sample, read; @@ -68,6 +74,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + enum show_feature_header show_feat_hdr; }; #endif /* __PERF_TOOL_H */ From e9def1b2e74e3d2134133f70d2a84c242446bbe7 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:48 -0700 Subject: [PATCH 078/253] perf tools: Add feature header record to pipe-mode Add header record types to pipe-mode, reusing the functions used in file-mode and leveraging the new struct feat_fd. For alignment, check that synthesized events don't exceed pagesize. Add the perf_event__synthesize_feature event call back to process the new header records. Before this patch: $ perf record -o - -e cycles sleep 1 | perf report --stdio --header [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] ... After this patch: $ perf record -o - -e cycles sleep 1 | perf report --stdio --header # ======== # captured on: Mon May 22 16:33:43 2017 # ======== # # hostname : my_hostname # os release : 4.11.0-dbx-up_perf # perf version : 4.11.rc6.g6277c80 # arch : x86_64 # nrcpus online : 72 # nrcpus avail : 72 # cpudesc : Intel(R) Xeon(R) CPU E5-2696 v3 @ 2.30GHz # cpuid : GenuineIntel,6,63,2 # total memory : 263457192 kB # cmdline : /root/perf record -o - -e cycles -c 100000 sleep 1 # HEADER_CPU_TOPOLOGY info available, use -I to display # HEADER_NUMA_TOPOLOGY info available, use -I to display # pmu mappings: intel_bts = 6, uncore_imc_4 = 22, uncore_sbox_1 = 47, uncore_cbox_5 = 33, uncore_ha_0 = 16, uncore_cbox [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] ... Support added for the subcommands: report, inject, annotate and script. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-16-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../Documentation/perf.data-file-format.txt | 10 +- tools/perf/builtin-annotate.c | 1 + tools/perf/builtin-inject.c | 1 + tools/perf/builtin-record.c | 7 ++ tools/perf/builtin-report.c | 1 + tools/perf/builtin-script.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/event.h | 8 ++ tools/perf/util/header.c | 98 +++++++++++++++++++ tools/perf/util/header.h | 9 ++ tools/perf/util/session.c | 4 + tools/perf/util/tool.h | 3 +- 12 files changed, 141 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index de8b39dda7b8..e90c59c6d815 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -398,6 +398,11 @@ struct auxtrace_error_event { char msg[MAX_AUXTRACE_ERROR_MSG]; }; + PERF_RECORD_HEADER_FEATURE = 80, + +Describes a header feature. These are records used in pipe-mode that +contain information that otherwise would be in perf.data file's header. + Event types Define the event attributes with their IDs. @@ -422,8 +427,9 @@ struct perf_pipe_file_header { }; The information about attrs, data, and event_types is instead in the -synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and -PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode. +synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA, +PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE +that are generated by perf record in pipe-mode. References: diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7a5dc7e5c577..5205408e795b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -397,6 +397,7 @@ int cmd_annotate(int argc, const char **argv) .namespaces = perf_event__process_namespaces, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, + .feature = perf_event__process_feature, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ea8db38eedd1..2b8032908fb2 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -770,6 +770,7 @@ int cmd_inject(int argc, const char **argv) .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, + .feature = perf_event__repipe_op2_synth, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 64eef9a567d9..36d7117a7562 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -799,6 +799,13 @@ static int record__synthesize(struct record *rec, bool tail) return 0; if (file->is_pipe) { + err = perf_event__synthesize_features( + tool, session, rec->evlist, process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 40c3a92c8006..8e752ba5e887 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -718,6 +718,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, + .feature = perf_event__process_feature, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6e44552a0551..d430ff42208a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2682,6 +2682,7 @@ int cmd_script(int argc, const char **argv) .attr = process_attr, .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, + .feature = perf_event__process_feature, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dc5c3bb69d73..1c905ba3641b 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -57,6 +57,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_STAT_ROUND] = "STAT_ROUND", [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", [PERF_RECORD_TIME_CONV] = "TIME_CONV", + [PERF_RECORD_HEADER_FEATURE] = "FEATURE", }; static const char *perf_ns__names[] = { diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 9967c87af7a6..37c5fafb549c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -244,6 +244,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_STAT_ROUND = 77, PERF_RECORD_EVENT_UPDATE = 78, PERF_RECORD_TIME_CONV = 79, + PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_HEADER_MAX }; @@ -609,6 +610,12 @@ struct time_conv_event { u64 time_zero; }; +struct feature_event { + struct perf_event_header header; + u64 feat_id; + char data[]; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -639,6 +646,7 @@ union perf_event { struct stat_event stat; struct stat_round_event stat_round; struct time_conv_event time_conv; + struct feature_event feat; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0fdbf7554ea3..2e6036d3e584 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -35,6 +35,7 @@ #include "data.h" #include #include "asm/bug.h" +#include "tool.h" #include "sane_ctype.h" @@ -2982,6 +2983,103 @@ int perf_event__synthesize_attr(struct perf_tool *tool, return err; } +int perf_event__synthesize_features(struct perf_tool *tool, + struct perf_session *session, + struct perf_evlist *evlist, + perf_event__handler_t process) +{ + struct perf_header *header = &session->header; + struct feat_fd ff; + struct feature_event *fe; + size_t sz, sz_hdr; + int feat, ret; + + sz_hdr = sizeof(fe->header); + sz = sizeof(union perf_event); + /* get a nice alignment */ + sz = PERF_ALIGN(sz, page_size); + + memset(&ff, 0, sizeof(ff)); + + ff.buf = malloc(sz); + if (!ff.buf) + return -ENOMEM; + + ff.size = sz - sz_hdr; + + for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { + if (!feat_ops[feat].synthesize) { + pr_debug("No record header feature for header :%d\n", feat); + continue; + } + + ff.offset = sizeof(*fe); + + ret = feat_ops[feat].write(&ff, evlist); + if (ret || ff.offset <= (ssize_t)sizeof(*fe)) { + pr_debug("Error writing feature\n"); + continue; + } + /* ff.buf may have changed due to realloc in do_write() */ + fe = ff.buf; + memset(fe, 0, sizeof(*fe)); + + fe->feat_id = feat; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = ff.offset; + + ret = process(tool, ff.buf, NULL, NULL); + if (ret) { + free(ff.buf); + return ret; + } + } + free(ff.buf); + return 0; +} + +int perf_event__process_feature(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct feat_fd ff = { .fd = 0 }; + struct feature_event *fe = (struct feature_event *)event; + int type = fe->header.type; + u64 feat = fe->feat_id; + + if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { + pr_warning("invalid record type %d in pipe-mode\n", type); + return 0; + } + if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) { + pr_warning("invalid record type %d in pipe-mode\n", type); + return -1; + } + + if (!feat_ops[feat].process) + return 0; + + ff.buf = (void *)fe->data; + ff.size = event->header.size - sizeof(event->header); + ff.ph = &session->header; + + if (feat_ops[feat].process(&ff, NULL)) + return -1; + + if (!feat_ops[feat].print || !tool->show_feat_hdr) + return 0; + + if (!feat_ops[feat].full_only || + tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { + feat_ops[feat].print(&ff, stdout); + } else { + fprintf(stdout, "# %s info available, use -I to display\n", + feat_ops[feat].name); + } + + return 0; +} + static struct event_update_event * event_update_event__new(size_t size, u64 type, u64 id) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 9d8dcd5eb727..f7a16ee527b8 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -101,6 +101,15 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); +int perf_event__synthesize_features(struct perf_tool *tool, + struct perf_session *session, + struct perf_evlist *evlist, + perf_event__handler_t process); + +int perf_event__process_feature(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); + int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d19c40a81040..dc453f84a14c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -428,6 +428,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->stat_round = process_stat_round_stub; if (tool->time_conv == NULL) tool->time_conv = process_event_op2_stub; + if (tool->feature == NULL) + tool->feature = process_event_op2_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -1371,6 +1373,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; return tool->time_conv(tool, event, session); + case PERF_RECORD_HEADER_FEATURE: + return tool->feature(tool, event, session); default: return -EINVAL; } diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index baeca808dfda..d549e50db397 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -69,7 +69,8 @@ struct perf_tool { cpu_map, stat_config, stat, - stat_round; + stat_round, + feature; event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; From f9ebdccf2b78e643d1ba2c979fa293c9d1e8ba86 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Mon, 17 Jul 2017 21:25:49 -0700 Subject: [PATCH 079/253] perf header: Add event desc to pipe-mode header Add event descriptor to perf header output in pipe-mode. After this patch: $ perf record -e cycles sleep 1 | perf report --header # ======== # captured on: Mon Jun 5 22:52:13 2017 # ======== # # hostname : lphh20 # os release : 4.3.5-smp-801.43.0.0 # perf version : 4.12.rc2.g439987 # arch : x86_64 # nrcpus online : 72 # nrcpus avail : 72 # cpudesc : Intel(R) Xeon(R) CPU E5-2696 v3 @ 2.30GHz # cpuid : GenuineIntel,6,63,2 # total memory : 264134144 kB # cmdline : /root/perf record -e cycles sleep 1 # event : name = cycles, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, enable_on_exec = 1, task = 1, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1 # CPU_TOPOLOGY info available, use -I to display # NUMA_TOPOLOGY info available, use -I to display # pmu mappings: intel_bts = 6, cpu = 4, msr = 49, uncore_cbox_10 = 36, uncore_cbox_11 = 37, uncore_cbox_12 = 38, uncore_cbox_13 = 39, uncore_cbox_14 = 40, uncore_cbox_15 = 41, uncore_cbox_16 = 42, uncore_cbox_17 = 43, software = 1, power = 7, uncore_irp = 24, uncore_pcu = 48, tracepoint = 2, uncore_imc_0 = 16, uncore_imc_1 = 17, uncore_imc_2 = 18, uncore_imc_3 = 19, uncore_imc_4 = 20, uncore_imc_5 = 21, uncore_imc_6 = 22, uncore_imc_7 = 23, uncore_qpi_0 = 8, uncore_qpi_1 = 9, uncore_cbox_0 = 26, uncore_cbox_1 = 27, uncore_cbox_2 = 28, uncore_cbox_3 = 29, uncore_cbox_4 = 30, uncore_cbox_5 = 31, uncore_cbox_6 = 32, uncore_cbox_7 = 33, uncore_cbox_8 = 34, uncore_cbox_9 = 35, uncore_r2pcie = 13, uncore_r3qpi_0 = 10, uncore_r3qpi_1 = 11, uncore_r3qpi_2 = 12, uncore_sbox_0 = 44, uncore_sbox_1 = 45, uncore_sbox_2 = 46, uncore_sbox_3 = 47, breakpoint = 5, uncore_ha_0 = 14, uncore_ha_1 = 15, uncore_ubox = 25 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB (null) ] Prior to this patch, event was not printed. Signed-off-by: David Carrillo-Cisneros Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: He Kuang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20170718042549.145161-17-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2e6036d3e584..28bf4442d577 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -67,6 +67,7 @@ struct feat_fd { void *buf; /* Either buf != NULL or fd >= 0 */ ssize_t offset; size_t size; + struct perf_evsel *events; }; void perf_header__set_feat(struct perf_header *header, int feat) @@ -1359,10 +1360,15 @@ static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, static void print_event_desc(struct feat_fd *ff, FILE *fp) { - struct perf_evsel *evsel, *events = read_event_desc(ff); + struct perf_evsel *evsel, *events; u32 j; u64 *id; + if (ff->events) + events = ff->events; + else + events = read_event_desc(ff); + if (!events) { fprintf(fp, "# event desc: not available or unable to read\n"); return; @@ -1387,6 +1393,7 @@ static void print_event_desc(struct feat_fd *ff, FILE *fp) } free_event_desc(events); + ff->events = NULL; } static void print_total_mem(struct feat_fd *ff, FILE *fp) @@ -1757,10 +1764,18 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) return 0; session = container_of(ff->ph, struct perf_session, header); + + if (session->file->is_pipe) { + /* Save events for reading later by print_event_desc, + * since they can't be read again in pipe mode. */ + ff->events = events; + } + for (evsel = events; evsel->attr.size; evsel++) perf_evlist__set_event_name(session->evlist, evsel); - free_event_desc(events); + if (!session->file->is_pipe) + free_event_desc(events); return 0; } From eb0baf8a0d9259d168523b8e7c436b55ade7c546 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:09 +0800 Subject: [PATCH 080/253] perf/core: Define the common branch type classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is often useful to know the branch types while analyzing branch data. For example, a call is very different from a conditional branch. Currently we have to look it up in binary while the binary may later not be available and even the binary is available but user has to take some time. It is very useful for user to check it directly in perf report. Perf already has support for disassembling the branch instruction to get the x86 branch type. To keep consistent on kernel and userspace and make the classification more common, the patch adds the common branch type classification in perf_event.h. The patch only defines a minimum but most common set of branch types. PERF_BR_UNKNOWN : unknown PERF_BR_COND :conditional PERF_BR_UNCOND : unconditional PERF_BR_IND : indirect PERF_BR_CALL : function call PERF_BR_IND_CALL : indirect function call PERF_BR_RET : function return PERF_BR_SYSCALL : syscall PERF_BR_SYSRET : syscall return PERF_BR_COND_CALL : conditional function call PERF_BR_COND_RET : conditional function return The patch also adds a new field type (4 bits) in perf_branch_entry to record the branch type. Since the disassembling of branch instruction needs some overhead, a new PERF_SAMPLE_BRANCH_TYPE_SAVE is introduced to indicate if it needs to disassemble the branch instruction and record the branch type. Change log: v10: Not changed. v9: Not changed. v8: Change PERF_BR_NONE to PERF_BR_UNKNOWN. No other change. v7: Just keep the most common branch types. Others are removed. v6: Not changed. v5: Not changed. The v5 patch series just change the userspace. v4: Comparing to previous version, the major changes are: 1. Remove the PERF_BR_JCC_FWD/PERF_BR_JCC_BWD, they will be computed later in userspace. 2. Remove the "cross" field in perf_branch_entry. The cross page computing will be done later in userspace. Signed-off-by: Yao Jin Acked-by: Jiri Olsa Acked-by: Michael Ellerman Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Link: http://lkml.kernel.org/r/1500379995-6449-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 27 ++++++++++++++++++++++++++- tools/include/uapi/linux/perf_event.h | 27 ++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b1c0b187acfe..642db5fa3286 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ + PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -198,9 +200,30 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = + 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; +/* + * Common flow change classification + */ +enum { + PERF_BR_UNKNOWN = 0, /* unknown */ + PERF_BR_COND = 1, /* conditional */ + PERF_BR_UNCOND = 2, /* unconditional */ + PERF_BR_IND = 3, /* indirect */ + PERF_BR_CALL = 4, /* function call */ + PERF_BR_IND_CALL = 5, /* indirect function call */ + PERF_BR_RET = 6, /* function return */ + PERF_BR_SYSCALL = 7, /* syscall */ + PERF_BR_SYSRET = 8, /* syscall return */ + PERF_BR_COND_CALL = 9, /* conditional function call */ + PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1015,6 +1038,7 @@ union perf_mem_data_src { * in_tx: running in a hardware transaction * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) + * type: branch type */ struct perf_branch_entry { __u64 from; @@ -1024,7 +1048,8 @@ struct perf_branch_entry { in_tx:1, /* in transaction */ abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ - reserved:44; + type:4, /* branch type */ + reserved:40; }; #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b1c0b187acfe..642db5fa3286 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ + PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -198,9 +200,30 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = + 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; +/* + * Common flow change classification + */ +enum { + PERF_BR_UNKNOWN = 0, /* unknown */ + PERF_BR_COND = 1, /* conditional */ + PERF_BR_UNCOND = 2, /* unconditional */ + PERF_BR_IND = 3, /* indirect */ + PERF_BR_CALL = 4, /* function call */ + PERF_BR_IND_CALL = 5, /* indirect function call */ + PERF_BR_RET = 6, /* function return */ + PERF_BR_SYSCALL = 7, /* syscall */ + PERF_BR_SYSRET = 8, /* syscall return */ + PERF_BR_COND_CALL = 9, /* conditional function call */ + PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1015,6 +1038,7 @@ union perf_mem_data_src { * in_tx: running in a hardware transaction * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) + * type: branch type */ struct perf_branch_entry { __u64 from; @@ -1024,7 +1048,8 @@ struct perf_branch_entry { in_tx:1, /* in transaction */ abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ - reserved:44; + type:4, /* branch type */ + reserved:40; }; #endif /* _UAPI_LINUX_PERF_EVENT_H */ From d5c7f9dc58edcfb6b45f557bb0023173a0dabde6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:10 +0800 Subject: [PATCH 081/253] perf/x86/intel: Record branch type Perf already has support for disassembling the branch instruction and using the branch type for filtering. The patch just records the branch type in perf_branch_entry. Before recording, the patch converts the x86 branch type to common branch type. Change log: v10: Set the branch_map array to be static. The previous version has it on stack then makes the compiler to create it every time when the function gets called. v9: Use __ffs() to find first bit in type in common_branch_type(). It lets the code be clear. v8: Change PERF_BR_NONE to PERF_BR_UNKNOWN. v7: Just convert following x86 branch types to common branch types. X86_BR_CALL -> PERF_BR_CALL X86_BR_RET -> PERF_BR_RET X86_BR_JCC -> PERF_BR_COND X86_BR_JMP -> PERF_BR_UNCOND X86_BR_IND_CALL -> PERF_BR_IND_CALL X86_BR_ZERO_CALL -> PERF_BR_CALL X86_BR_IND_JMP -> PERF_BR_IND X86_BR_SYSCALL -> PERF_BR_SYSCALL X86_BR_SYSRET -> PERF_BR_SYSRET Others are set to PERF_BR_NONE v6: Not changed. v5: Just fix the merge error. No other update. v4: Comparing to previous version, the major changes are: 1. Uses a lookup table to convert x86 branch type to common branch type. 2. Move the JCC forward/JCC backward and cross page computing to user space. 3. Initialize branch type to 0 in intel_pmu_lbr_read_32 and intel_pmu_lbr_read_64 Signed-off-by: Yao Jin Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Link: http://lkml.kernel.org/r/1500379995-6449-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/events/intel/lbr.c | 52 ++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index eb261656a320..0edda489cf36 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -109,6 +109,9 @@ enum { X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ X86_BR_CALL_STACK = 1 << 16,/* call stack */ X86_BR_IND_JMP = 1 << 17,/* indirect jump */ + + X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */ + }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) @@ -510,6 +513,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].in_tx = 0; cpuc->lbr_entries[i].abort = 0; cpuc->lbr_entries[i].cycles = 0; + cpuc->lbr_entries[i].type = 0; cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; @@ -596,6 +600,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) cpuc->lbr_entries[out].in_tx = in_tx; cpuc->lbr_entries[out].abort = abort; cpuc->lbr_entries[out].cycles = cycles; + cpuc->lbr_entries[out].type = 0; cpuc->lbr_entries[out].reserved = 0; out++; } @@ -673,6 +678,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_CALL) mask |= X86_BR_CALL | X86_BR_ZERO_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) + mask |= X86_BR_TYPE_SAVE; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -926,6 +935,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort) return ret; } +#define X86_BR_TYPE_MAP_MAX 16 + +static int branch_map[X86_BR_TYPE_MAP_MAX] = { + PERF_BR_CALL, /* X86_BR_CALL */ + PERF_BR_RET, /* X86_BR_RET */ + PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ + PERF_BR_SYSRET, /* X86_BR_SYSRET */ + PERF_BR_UNKNOWN, /* X86_BR_INT */ + PERF_BR_UNKNOWN, /* X86_BR_IRET */ + PERF_BR_COND, /* X86_BR_JCC */ + PERF_BR_UNCOND, /* X86_BR_JMP */ + PERF_BR_UNKNOWN, /* X86_BR_IRQ */ + PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_ABORT */ + PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ + PERF_BR_UNKNOWN, /* X86_BR_NO_TX */ + PERF_BR_CALL, /* X86_BR_ZERO_CALL */ + PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */ + PERF_BR_IND, /* X86_BR_IND_JMP */ +}; + +static int +common_branch_type(int type) +{ + int i; + + type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */ + + if (type) { + i = __ffs(type); + if (i < X86_BR_TYPE_MAP_MAX) + return branch_map[i]; + } + + return PERF_BR_UNKNOWN; +} + /* * implement actual branch filter based on user demand. * Hardware may not exactly satisfy that request, thus @@ -942,7 +988,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) bool compress = false; /* if sampling all branches, then nothing to filter */ - if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && + ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) return; for (i = 0; i < cpuc->lbr_stack.nr; i++) { @@ -963,6 +1010,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) cpuc->lbr_entries[i].from = 0; compress = true; } + + if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) + cpuc->lbr_entries[i].type = common_branch_type(type); } if (!compress) From 60f83fa6341dab4aec01cee354ea902771473adb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:11 +0800 Subject: [PATCH 082/253] perf record: Create a new option save_type in --branch-filter The option indicates the kernel to save branch type during sampling. One example: perf record -g --branch-filter any,save_type Signed-off-by: Yao Jin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500379995-6449-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 1 + tools/perf/util/parse-branch-options.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b0e9e921d534..9bdea047c5db 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -332,6 +332,7 @@ following filters are defined: - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort - cond: conditional branches + - save_type: save branch type during sampling in case binary is not available later + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 38fd11504015..e71fb5f31e84 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -28,6 +28,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL), + BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE), BRANCH_END }; From 8d51735fcd2be1791c0bfe2d581d9063281fe7fb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:12 +0800 Subject: [PATCH 083/253] perf report: Refactor the branch info printing code The branch info such as predicted/cycles/... are printed at the callchain entries. For example: perf report --branch-history --no-children --stdio --1.07%--main div.c:39 (predicted:52.4% cycles:1 iterations:17) main div.c:44 (predicted:52.4% cycles:1) main div.c:42 (cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (cycles:1) But the current code is difficult to maintain and extend. This patch refactors the code for easy maintenance. Change log: v6: 1. Put the multiline condition code into {} brackets in counts_str_build() 2. Keep the original display order, that is: predicted, abort, cycles, iterations v5: It's a new patch in v5 patch series. Signed-off-by: Yao Jin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500379995-6449-5-git-send-email-yao.jin@linux.intel.com [ Don't use 'index' as a name for a variable, it shadows a globa decl in older distros ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 104 +++++++++++++++--------------------- 1 file changed, 43 insertions(+), 61 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index b4204b43ed58..917f4d6510ae 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1214,83 +1214,65 @@ int callchain_branch_counts(struct callchain_root *root, cycles_count); } +static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize) +{ + int printed; + + printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value); + + return printed; +} + +static int count_float_printf(int idx, const char *str, float value, char *bf, int bfsize) +{ + int printed; + + printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value); + + return printed; +} + static int counts_str_build(char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, u64 iter_count, u64 samples_count) { - double predicted_percent = 0.0; - const char *null_str = ""; - char iter_str[32]; - char cycle_str[32]; - char *istr, *cstr; u64 cycles; + int printed = 0, i = 0; if (branch_count == 0) return scnprintf(bf, bfsize, " (calltrace)"); + if (predicted_count < branch_count) { + printed += count_float_printf(i++, "predicted", + predicted_count * 100.0 / branch_count, + bf + printed, bfsize - printed); + } + + if (abort_count) { + printed += count_float_printf(i++, "abort", + abort_count * 100.0 / branch_count, + bf + printed, bfsize - printed); + } + cycles = cycles_count / branch_count; + if (cycles) { + printed += count_pri64_printf(i++, "cycles", + cycles, + bf + printed, bfsize - printed); + } if (iter_count && samples_count) { - if (cycles > 0) - scnprintf(iter_str, sizeof(iter_str), - " iterations:%" PRId64 "", - iter_count / samples_count); - else - scnprintf(iter_str, sizeof(iter_str), - "iterations:%" PRId64 "", - iter_count / samples_count); - istr = iter_str; - } else - istr = (char *)null_str; - - if (cycles > 0) { - scnprintf(cycle_str, sizeof(cycle_str), - "cycles:%" PRId64 "", cycles); - cstr = cycle_str; - } else - cstr = (char *)null_str; - - predicted_percent = predicted_count * 100.0 / branch_count; - - if ((predicted_count == branch_count) && (abort_count == 0)) { - if ((cycles > 0) || (istr != (char *)null_str)) - return scnprintf(bf, bfsize, " (%s%s)", cstr, istr); - else - return scnprintf(bf, bfsize, "%s", (char *)null_str); + printed += count_pri64_printf(i++, "iterations", + iter_count / samples_count, + bf + printed, bfsize - printed); } - if ((predicted_count < branch_count) && (abort_count == 0)) { - if ((cycles > 0) || (istr != (char *)null_str)) - return scnprintf(bf, bfsize, - " (predicted:%.1f%% %s%s)", - predicted_percent, cstr, istr); - else { - return scnprintf(bf, bfsize, - " (predicted:%.1f%%)", - predicted_percent); - } - } + if (i) + return scnprintf(bf + printed, bfsize - printed, ")"); - if ((predicted_count == branch_count) && (abort_count > 0)) { - if ((cycles > 0) || (istr != (char *)null_str)) - return scnprintf(bf, bfsize, - " (abort:%" PRId64 " %s%s)", - abort_count, cstr, istr); - else - return scnprintf(bf, bfsize, - " (abort:%" PRId64 ")", - abort_count); - } - - if ((cycles > 0) || (istr != (char *)null_str)) - return scnprintf(bf, bfsize, - " (predicted:%.1f%% abort:%" PRId64 " %s%s)", - predicted_percent, abort_count, cstr, istr); - - return scnprintf(bf, bfsize, - " (predicted:%.1f%% abort:%" PRId64 ")", - predicted_percent, abort_count); + bf[0] = 0; + return 0; } static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, From 992c7e9267c12a8e301152c5569028ff8d535322 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:13 +0800 Subject: [PATCH 084/253] perf util: Create branch.c/.h for common branch functions Create new util/branch.c and util/branch.h to contain the common branch functions. Such as: branch_type_count(): Count the numbers of branch types branch_type_name() : Return the name of branch type branch_type_stat_display(): Display branch type statistics info branch_type_str(): Construct the branch type string. The branch type is saved in branch_flags. Change log: v8: Change PERF_BR_NONE to PERF_BR_UNKNOWN. v7: Since the common branch type name is changed (e.g. JCC->COND), this patch is performed the modification accordingly. v6: Move that multiline conditional code inside {} brackets. Move branch_type_stat_display() from builtin-report.c to branch.c. Move branch_type_str() from callchain.c to branch.c. v5: It's a new patch in v5 patch series. Signed-off-by: Yao Jin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500379995-6449-6-git-send-email-yao.jin@linux.intel.com [ Don't use 'index' and 'stat' as names for variables, it shadows global decls in older distros ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/branch.c | 147 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/branch.h | 24 +++++++ tools/perf/util/event.h | 3 +- 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/branch.c create mode 100644 tools/perf/util/branch.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7580fe4d5d30..8d49a989f193 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -93,6 +93,7 @@ libperf-y += drv_configs.o libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o +libperf-y += branch.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c new file mode 100644 index 000000000000..a4fce2729e50 --- /dev/null +++ b/tools/perf/util/branch.c @@ -0,0 +1,147 @@ +#include "perf.h" +#include "util/util.h" +#include "util/debug.h" +#include "util/branch.h" + +static bool cross_area(u64 addr1, u64 addr2, int size) +{ + u64 align1, align2; + + align1 = addr1 & ~(size - 1); + align2 = addr2 & ~(size - 1); + + return (align1 != align2) ? true : false; +} + +#define AREA_4K 4096 +#define AREA_2M (2 * 1024 * 1024) + +void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, + u64 from, u64 to) +{ + if (flags->type == PERF_BR_UNKNOWN || from == 0) + return; + + st->counts[flags->type]++; + + if (flags->type == PERF_BR_COND) { + if (to > from) + st->cond_fwd++; + else + st->cond_bwd++; + } + + if (cross_area(from, to, AREA_2M)) + st->cross_2m++; + else if (cross_area(from, to, AREA_4K)) + st->cross_4k++; +} + +const char *branch_type_name(int type) +{ + const char *branch_names[PERF_BR_MAX] = { + "N/A", + "COND", + "UNCOND", + "IND", + "CALL", + "IND_CALL", + "RET", + "SYSCALL", + "SYSRET", + "COND_CALL", + "COND_RET" + }; + + if (type >= 0 && type < PERF_BR_MAX) + return branch_names[type]; + + return NULL; +} + +void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) +{ + u64 total = 0; + int i; + + for (i = 0; i < PERF_BR_MAX; i++) + total += st->counts[i]; + + if (total == 0) + return; + + fprintf(fp, "\n#"); + fprintf(fp, "\n# Branch Statistics:"); + fprintf(fp, "\n#"); + + if (st->cond_fwd > 0) { + fprintf(fp, "\n%8s: %5.1f%%", + "COND_FWD", + 100.0 * (double)st->cond_fwd / (double)total); + } + + if (st->cond_bwd > 0) { + fprintf(fp, "\n%8s: %5.1f%%", + "COND_BWD", + 100.0 * (double)st->cond_bwd / (double)total); + } + + if (st->cross_4k > 0) { + fprintf(fp, "\n%8s: %5.1f%%", + "CROSS_4K", + 100.0 * (double)st->cross_4k / (double)total); + } + + if (st->cross_2m > 0) { + fprintf(fp, "\n%8s: %5.1f%%", + "CROSS_2M", + 100.0 * (double)st->cross_2m / (double)total); + } + + for (i = 0; i < PERF_BR_MAX; i++) { + if (st->counts[i] > 0) + fprintf(fp, "\n%8s: %5.1f%%", + branch_type_name(i), + 100.0 * + (double)st->counts[i] / (double)total); + } +} + +static int count_str_scnprintf(int idx, const char *str, char *bf, int size) +{ + return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str); +} + +int branch_type_str(struct branch_type_stat *st, char *bf, int size) +{ + int i, j = 0, printed = 0; + u64 total = 0; + + for (i = 0; i < PERF_BR_MAX; i++) + total += st->counts[i]; + + if (total == 0) + return 0; + + if (st->cond_fwd > 0) + printed += count_str_scnprintf(j++, "COND_FWD", bf + printed, size - printed); + + if (st->cond_bwd > 0) + printed += count_str_scnprintf(j++, "COND_BWD", bf + printed, size - printed); + + for (i = 0; i < PERF_BR_MAX; i++) { + if (i == PERF_BR_COND) + continue; + + if (st->counts[i] > 0) + printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed); + } + + if (st->cross_4k > 0) + printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed); + + if (st->cross_2m > 0) + printed += count_str_scnprintf(j++, "CROSS_2M", bf + printed, size - printed); + + return printed; +} diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h new file mode 100644 index 000000000000..686f2b65ba84 --- /dev/null +++ b/tools/perf/util/branch.h @@ -0,0 +1,24 @@ +#ifndef _PERF_BRANCH_H +#define _PERF_BRANCH_H 1 + +#include +#include "../perf.h" + +struct branch_type_stat { + u64 counts[PERF_BR_MAX]; + u64 cond_fwd; + u64 cond_bwd; + u64 cross_4k; + u64 cross_2m; +}; + +struct branch_flags; + +void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, + u64 from, u64 to); + +const char *branch_type_name(int type); +void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); +int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); + +#endif /* _PERF_BRANCH_H */ diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 37c5fafb549c..423ac82605f3 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -142,7 +142,8 @@ struct branch_flags { u64 in_tx:1; u64 abort:1; u64 cycles:16; - u64 reserved:44; + u64 type:4; + u64 reserved:40; }; struct branch_entry { From 2d78b18952a1bdf125d13fa6bb68fbc5c1b0aed9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:14 +0800 Subject: [PATCH 085/253] perf report: Show branch type statistics for stdio mode Show the branch type statistics at the end of perf report --stdio. For example: perf report --stdio COND_FWD: 28.5% COND_BWD: 9.4% CROSS_4K: 0.7% CROSS_2M: 14.1% COND: 37.9% UNCOND: 0.2% IND: 6.7% CALL: 26.5% RET: 28.7% SYSRET: 0.0% The branch types are: COND_FWD: conditional forward COND_BWD: conditional backward COND: conditional branch UNCOND: unconditional branch IND: indirect CALL: function call IND_CALL: indirect function call RET: function return SYSCALL: syscall SYSRET: syscall return COND_CALL: conditional function call COND_RET: conditional function return CROSS_4K and CROSS_2M: They are the metrics checking for branches cross 4K or 2MB pages. It's an approximate computing. We don't know if the area is 4K or 2MB, so always compute both. To make the output simple, if a branch crosses 2M area, CROSS_4K will not be incremented. Change log v7: Since the common branch type definitions are changed, some tags/strings are updated accordingly. v6: Remove branch_type_stat_display() since it's moved to branch.c. v5: Remove the unnecessary sort__mode checking in hist_iter__branch_callback(). v4: Comparing to previous version, the major changes are: Add the computing of JCC forward/JCC backward and cross page checking by using the from and to addresses. Signed-off-by: Yao Jin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500379995-6449-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 25 +++++++++++++++++++++++++ tools/perf/util/hist.c | 5 +---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e752ba5e887..cea25d03f4dd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -38,6 +38,7 @@ #include "util/time-utils.h" #include "util/auxtrace.h" #include "util/units.h" +#include "util/branch.h" #include #include @@ -73,6 +74,7 @@ struct report { u64 queue_size; int socket_filter; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); + struct branch_type_stat brtype_stat; }; static int report__config(const char *var, const char *value, void *cb) @@ -150,6 +152,22 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, return err; } +static int hist_iter__branch_callback(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused, + bool single __maybe_unused, + void *arg) +{ + struct hist_entry *he = iter->he; + struct report *rep = arg; + struct branch_info *bi; + + bi = he->branch_info; + branch_type_count(&rep->brtype_stat, &bi->flags, + bi->from.addr, bi->to.addr); + + return 0; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -188,6 +206,8 @@ static int process_sample_event(struct perf_tool *tool, */ if (!sample->branch_stack) goto out_put; + + iter.add_entry_cb = hist_iter__branch_callback; iter.ops = &hist_iter_branch; } else if (rep->mem_mode) { iter.ops = &hist_iter_mem; @@ -410,6 +430,9 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, perf_read_values_destroy(&rep->show_threads_values); } + if (sort__mode == SORT_MODE__BRANCH) + branch_type_stat_display(stdout, &rep->brtype_stat); + return 0; } @@ -944,6 +967,8 @@ int cmd_report(int argc, const char **argv) if (has_br_stack && branch_call_mode) symbol_conf.show_branchflag_count = true; + memset(&report.brtype_stat, 0, sizeof(struct branch_type_stat)); + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cf0186a088c1..2f6c5e6c16f9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -749,12 +749,9 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al } static int -iter_add_single_branch_entry(struct hist_entry_iter *iter, +iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) { - /* to avoid calling callback function */ - iter->he = NULL; - return 0; } From b851dd49868e295e18c5d72fc3bad85ff1c444b1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:15 +0800 Subject: [PATCH 086/253] perf report: Show branch type in callchain entry Show branch type in callchain entry. The branch type is printed with other LBR information (such as cycles/abort/...). For example: perf record -g -j any,save_type perf report --branch-history --stdio --no-children 38.50% div.c:45 [.] main div | ---main div.c:42 (RET CROSS_2M cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (RET CROSS_2M cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M cycles:9) Change log v6: Remove the branch_type_str() since it's moved to branch.c. v5: Rewrite the branch info print code in util/callchain.c. v4: Comparing to previous version, the major changes are: Signed-off-by: Yao Jin Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500379995-6449-8-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 38 ++++++++++++++++++++++++++++--------- tools/perf/util/callchain.h | 5 ++++- tools/perf/util/machine.c | 26 ++++++++++++++++--------- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 917f4d6510ae..22d413ae6025 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -23,6 +23,7 @@ #include "sort.h" #include "machine.h" #include "callchain.h" +#include "branch.h" #define CALLCHAIN_PARAM_DEFAULT \ .mode = CHAIN_GRAPH_ABS, \ @@ -571,6 +572,11 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->cycles_count = cursor_node->branch_flags.cycles; call->iter_count = cursor_node->nr_loop_iter; call->samples_count = cursor_node->samples; + + branch_type_count(&call->brtype_stat, + &cursor_node->branch_flags, + cursor_node->branch_from, + cursor_node->ip); } list_add_tail(&call->list, &node->val); @@ -688,6 +694,11 @@ static enum match_result match_chain(struct callchain_cursor_node *node, cnode->cycles_count += node->branch_flags.cycles; cnode->iter_count += node->nr_loop_iter; cnode->samples_count += node->samples; + + branch_type_count(&cnode->brtype_stat, + &node->branch_flags, + node->branch_from, + node->ip); } return MATCH_EQ; @@ -922,7 +933,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, list->ms.map, list->ms.sym, - false, NULL, 0, 0); + false, NULL, 0, 0, 0); list_del(&list->list); map__zput(list->ms.map); free(list); @@ -962,7 +973,7 @@ int callchain_merge(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, int samples) + int nr_loop_iter, int samples, u64 branch_from) { struct callchain_cursor_node *node = *cursor->last; @@ -986,6 +997,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, memcpy(&node->branch_flags, flags, sizeof(struct branch_flags)); + node->branch_from = branch_from; cursor->nr++; cursor->last = &node->next; @@ -1235,14 +1247,19 @@ static int count_float_printf(int idx, const char *str, float value, char *bf, i static int counts_str_build(char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count) + u64 iter_count, u64 samples_count, + struct branch_type_stat *brtype_stat) { u64 cycles; - int printed = 0, i = 0; + int printed, i = 0; if (branch_count == 0) return scnprintf(bf, bfsize, " (calltrace)"); + printed = branch_type_str(brtype_stat, bf, bfsize); + if (printed) + i++; + if (predicted_count < branch_count) { printed += count_float_printf(i++, "predicted", predicted_count * 100.0 / branch_count, @@ -1278,13 +1295,14 @@ static int counts_str_build(char *bf, int bfsize, static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count) + u64 iter_count, u64 samples_count, + struct branch_type_stat *brtype_stat) { - char str[128]; + char str[256]; counts_str_build(str, sizeof(str), branch_count, predicted_count, abort_count, cycles_count, - iter_count, samples_count); + iter_count, samples_count, brtype_stat); if (fp) return fprintf(fp, "%s", str); @@ -1316,7 +1334,8 @@ int callchain_list_counts__printf_value(struct callchain_node *node, return callchain_counts_printf(fp, bf, bfsize, branch_count, predicted_count, abort_count, - cycles_count, iter_count, samples_count); + cycles_count, iter_count, samples_count, + &clist->brtype_stat); } static void free_callchain_node(struct callchain_node *node) @@ -1441,7 +1460,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst, rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, node->branch, &node->branch_flags, - node->nr_loop_iter, node->samples); + node->nr_loop_iter, node->samples, + node->branch_from); if (rc) break; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c56c23dbbf72..97738201464a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,6 +7,7 @@ #include "event.h" #include "map.h" #include "symbol.h" +#include "branch.h" #define HELP_PAD "\t\t\t\t" @@ -119,6 +120,7 @@ struct callchain_list { u64 cycles_count; u64 iter_count; u64 samples_count; + struct branch_type_stat brtype_stat; char *srcline; struct list_head list; }; @@ -135,6 +137,7 @@ struct callchain_cursor_node { struct symbol *sym; bool branch; struct branch_flags branch_flags; + u64 branch_from; int nr_loop_iter; int samples; struct callchain_cursor_node *next; @@ -198,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, int samples); + int nr_loop_iter, int samples, u64 branch_from); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a54a2be5eda4..79d08ea694da 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1682,7 +1682,8 @@ static int add_callchain_ip(struct thread *thread, bool branch, struct branch_flags *flags, int nr_loop_iter, - int samples) + int samples, + u64 branch_from) { struct addr_location al; @@ -1735,7 +1736,8 @@ static int add_callchain_ip(struct thread *thread, if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; return callchain_cursor_append(cursor, al.addr, al.map, al.sym, - branch, flags, nr_loop_iter, samples); + branch, flags, nr_loop_iter, samples, + branch_from); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1814,7 +1816,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr), i; u8 cpumode = PERF_RECORD_MISC_USER; - u64 ip; + u64 ip, branch_from = 0; for (i = 0; i < chain_nr; i++) { if (chain->ips[i] == PERF_CONTEXT_USER) @@ -1856,6 +1858,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = lbr_stack->entries[0].to; branch = true; flags = &lbr_stack->entries[0].flags; + branch_from = + lbr_stack->entries[0].from; } } else { if (j < lbr_nr) { @@ -1870,12 +1874,15 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = lbr_stack->entries[0].to; branch = true; flags = &lbr_stack->entries[0].flags; + branch_from = + lbr_stack->entries[0].from; } } err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - branch, flags, 0, 0); + branch, flags, 0, 0, + branch_from); if (err) return (err < 0) ? err : 0; } @@ -1974,19 +1981,20 @@ static int thread__resolve_callchain_sample(struct thread *thread, root_al, NULL, be[i].to, true, &be[i].flags, - nr_loop_iter, 1); + nr_loop_iter, 1, + be[i].from); else err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].to, true, &be[i].flags, - 0, 0); + 0, 0, be[i].from); if (!err) err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from, true, &be[i].flags, - 0, 0); + 0, 0, 0); if (err == -EINVAL) break; if (err) @@ -2016,7 +2024,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - false, NULL, 0, 0); + false, NULL, 0, 0, 0); if (err) return (err < 0) ? err : 0; @@ -2033,7 +2041,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return 0; return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, - false, NULL, 0, 0); + false, NULL, 0, 0, 0); } static int thread__resolve_callchain_unwind(struct thread *thread, From 69d8bd8aa7d8906a1e922ae884d97f0bd7f1b269 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 30 Jun 2017 10:16:55 -0400 Subject: [PATCH 087/253] perf intel-pt: Set no_aux_samples for the tracking event The reason of introducing the tracking event (a dummy software event) is to collect side-band information. Additional sampling is wasteful. no_aux_samples should be set for tracking event. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170630141656.1626-1-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 9535be57033f..4a461e8ae326 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -752,6 +752,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->attr.freq = 0; tracking_evsel->attr.sample_period = 1; + tracking_evsel->no_aux_samples = true; if (need_immediate) tracking_evsel->immediate = true; From 91a8c5b840f2da31280e14b6268761cf14033756 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 30 Jun 2017 10:16:56 -0400 Subject: [PATCH 088/253] perf intel-pt: Always set no branch for dummy event An earlier kernel patch allowed enabling PT and LBR at the same time on Goldmont. commit ccbebba4c6bf ("perf/x86/intel/pt: Bypass PT vs. LBR exclusivity if the core supports it") However, users still cannot use Intel PT and LBRs simultaneously. $ sudo perf record -e cycles,intel_pt//u -b -- sleep 1 Error: PMU Hardware doesn't support sampling/overflow-interrupts. PT implicitly adds dummy event in perf tool. dummy event is software event which doesn't support LBR. Always setting no branch for dummy event in Intel PT. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170630141656.1626-2-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 4a461e8ae326..db0ba8caf5a2 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -701,6 +701,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, perf_evsel__set_sample_bit(switch_evsel, TID); perf_evsel__set_sample_bit(switch_evsel, TIME); perf_evsel__set_sample_bit(switch_evsel, CPU); + perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); opts->record_switch_events = false; ptr->have_sched_switch = 3; @@ -762,6 +763,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* And the CPU for switch events */ perf_evsel__set_sample_bit(tracking_evsel, CPU); } + perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); } /* From 6f8fe61ee583f1b75fef81d9c9434b7496a01881 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 11:39:47 -0300 Subject: [PATCH 089/253] perf trace: Add missing ' = ' in the default formatting of syscall returns We lost it recently, put it back. Before: 789.499 ( 0.001 ms): libvirtd/1175 lseek(fd: 22, whence: CUR) 4328 After: 789.499 ( 0.001 ms): libvirtd/1175 lseek(fd: 22, whence: CUR) = 4328 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 1f63139c3f8a ("perf trace beauty: Simplify syscall return formatting") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1e4c0657b712..4370ce4f28c7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1660,7 +1660,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (ret < 0) goto errno_print; signed_print: - fprintf(trace->output, ") %ld", ret); + fprintf(trace->output, ") = %ld", ret); } else if (ret < 0) { errno_print: { char bf[STRERR_BUFSIZE]; From d57da8c9a5e11e7786931cfa1fd744969f530b2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 12:16:15 -0300 Subject: [PATCH 090/253] perf trace beauty mmap: Ignore 'fd' and 'offset' args for MAP_ANONYMOUS Just suppress them, not used by the kernel. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-atpt07y2x9a8ttlwja94ow3j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/mmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index af1cfde6b97b..754558f9009d 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -34,6 +34,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, { int printed = 0, flags = arg->val; + if (flags & MAP_ANONYMOUS) + arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */ + #define P_MMAP_FLAG(n) \ if (flags & MAP_##n) { \ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ From 5e58fcfaf4c60795c241be739ad55c519d7f2aaa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 14:32:11 -0300 Subject: [PATCH 091/253] perf trace: Allow allocating sc->arg_fmt even without the syscall tracepoint At least "clone" doesn't have (enter, exit) entries tracefs/events/syscalls/, but we can provide a syscall_fmt and use it instead, as will be done for "clone" in the next cset. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-o12kejgcxddyovn2hlg4gbim@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4370ce4f28c7..1c5238af6d14 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1165,22 +1165,31 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) return err; } +static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) +{ + int idx; + + sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); + if (sc->arg_fmt == NULL) + return -1; + + for (idx = 0; idx < nr_args; ++idx) { + if (sc->fmt) + sc->arg_fmt[idx] = sc->fmt->arg[idx]; + } + + sc->nr_args = nr_args; + return 0; +} + static int syscall__set_arg_fmts(struct syscall *sc) { struct format_field *field; int idx = 0, len; - sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt)); - if (sc->arg_fmt == NULL) - return -1; - for (field = sc->args; field; field = field->next, ++idx) { - if (sc->fmt) { - sc->arg_fmt[idx] = sc->fmt->arg[idx]; - - if (sc->fmt->arg[idx].scnprintf) - continue; - } + if (sc->fmt && sc->fmt->arg[idx].scnprintf) + continue; if (strcmp(field->type, "const char *") == 0 && (strcmp(field->name, "filename") == 0 || @@ -1251,11 +1260,13 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) + return -1; + if (IS_ERR(sc->tp_format)) return -1; sc->args = sc->tp_format->format.fields; - sc->nr_args = sc->tp_format->format.nr_fields; /* * We need to check and discard the first variable '__syscall_nr' * or 'nr' that mean the syscall number. It is needless here. From d032d79e2dcb56e13678bf2cc7b36957ef827c32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 14:36:13 -0300 Subject: [PATCH 092/253] perf trace: Use the syscall_fmt formatters without a tracepoint Previously we only used the syscall_fmt when we had sc->tp_format set, i.e. when we found the (enter, exit) pair in tracefs/events/syscalls/. But we really only need to use what is in sc->arg_fmt to apply the arg beautifiers to the syscall argument values, so do it. With this we will be able to provide formatters to the "clone" syscall, which doesn't have entries in tracefs/events/syscalls/. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-y41nl41jrayjo5ucnde2peix@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 54 +++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1c5238af6d14..fc4d33a6aa58 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1350,12 +1350,32 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) return __syscall_arg__val(arg->args, idx); } +static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, + struct syscall_arg *arg, unsigned long val) +{ + if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { + arg->val = val; + if (sc->arg_fmt[arg->idx].parm) + arg->parm = sc->arg_fmt[arg->idx].parm; + return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); + } + return scnprintf(bf, size, "%ld", val); +} + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; unsigned long val; + u8 bit = 1; + struct syscall_arg arg = { + .args = args, + .idx = 0, + .mask = 0, + .trace = trace, + .thread = thread, + }; struct thread_trace *ttrace = thread__priv(thread); /* @@ -1367,14 +1387,6 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (sc->args != NULL) { struct format_field *field; - u8 bit = 1; - struct syscall_arg arg = { - .args = args, - .idx = 0, - .mask = 0, - .trace = trace, - .thread = thread, - }; for (field = sc->args; field; field = field->next, ++arg.idx, bit <<= 1) { @@ -1398,15 +1410,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); - if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) { - arg.val = val; - if (sc->arg_fmt[arg.idx].parm) - arg.parm = sc->arg_fmt[arg.idx].parm; - printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg); - } else { - printed += scnprintf(bf + printed, size - printed, - "%ld", val); - } + printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); } } else if (IS_ERR(sc->tp_format)) { /* @@ -1414,14 +1418,16 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, * may end up not having any args, like with gettid(), so only * print the raw args when we didn't manage to read it. */ - int i = 0; - - while (i < 6) { - val = __syscall_arg__val(args, i); + while (arg.idx < 6) { + if (arg.mask & bit) + goto next_arg; + val = syscall_arg__val(&arg, arg.idx); printed += scnprintf(bf + printed, size - printed, - "%sarg%d: %ld", - printed ? ", " : "", i, val); - ++i; + "%sarg%d: ", printed ? ", " : "", arg.idx); + printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); +next_arg: + ++arg.idx; + bit <<= 1; } } From 325f5091b0da5a7d8d190262661af17d75bee262 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 15:02:43 -0300 Subject: [PATCH 093/253] perf trace: Ditch __syscall__arg_val() variant, not needed anymore All callers now can use syscall__arg_val(arg, idx), be it to iterate thru the syscall arguments while taking into account alignment, or to get values for other arguments that affect how the current argument should be formatted (think of fcntl's 'cmd' and 'arg' arguments). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wm5b156d8kro1r4y3b33eyta@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fc4d33a6aa58..f8b7bfdf4ee7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1336,20 +1336,15 @@ static int trace__validate_ev_qualifier(struct trace *trace) * variable to read it. Most notably this avoids extended load instructions * on unaligned addresses */ -static unsigned long __syscall_arg__val(unsigned char *args, u8 idx) +unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) { unsigned long val; - unsigned char *p = args + sizeof(unsigned long) * idx; + unsigned char *p = arg->args + sizeof(unsigned long) * idx; memcpy(&val, p, sizeof(val)); return val; } -unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) -{ - return __syscall_arg__val(arg->args, idx); -} - static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, struct syscall_arg *arg, unsigned long val) { From 332337dafc98d88561bf3730f80c59cc93f089e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 15:08:14 -0300 Subject: [PATCH 094/253] perf trace: Allow specifying number of syscall args for tracepointless syscalls When we don't have syscalls:sys_{enter,exit}_NAME, we had to resort to dumping all the 6 syscall arguments, fix it by providing that info for such syscalls, like 'clone'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dfq1jtrxj8dqvqoeqqpr3slu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f8b7bfdf4ee7..e1988d08911d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -611,6 +611,7 @@ static struct syscall_fmt { const char *name; const char *alias; struct syscall_arg_fmt arg[6]; + u8 nr_args; bool errpid; bool timeout; bool hexret; @@ -1169,6 +1170,9 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) { int idx; + if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) + nr_args = sc->fmt->nr_args; + sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); if (sc->arg_fmt == NULL) return -1; @@ -1413,7 +1417,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, * may end up not having any args, like with gettid(), so only * print the raw args when we didn't manage to read it. */ - while (arg.idx < 6) { + while (arg.idx < sc->nr_args) { if (arg.mask & bit) goto next_arg; val = syscall_arg__val(&arg, arg.idx); From c51bdfecd782eec710237c53137e0fefd032d287 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 15:47:30 -0300 Subject: [PATCH 095/253] perf trace: Allow specifying names to syscall arguments formatters For tracepointless syscalls, like clone, otherwise get them from the tracepoint's /format file. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ml5qvv1w5k96ghwhxpzzsmm3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e1988d08911d..6664293584df 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -604,6 +604,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); void *parm; + const char *name; bool show_zero; }; @@ -1349,6 +1350,15 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) return val; } +static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, + struct syscall_arg *arg) +{ + if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) + return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name); + + return scnprintf(bf, size, "arg%d: ", arg->idx); +} + static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, struct syscall_arg *arg, unsigned long val) { @@ -1421,8 +1431,9 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (arg.mask & bit) goto next_arg; val = syscall_arg__val(&arg, arg.idx); - printed += scnprintf(bf + printed, size - printed, - "%sarg%d: ", printed ? ", " : "", arg.idx); + if (printed) + printed += scnprintf(bf + printed, size - printed, ", "); + printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); next_arg: ++arg.idx; From 450c86c9a3301db5165172bcaced73d036f9c582 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Jul 2017 10:46:34 -0300 Subject: [PATCH 096/253] tools include uapi: Grab a copy of linux/sched.h So that we make sure we have recent enough defines for things such as 'perf trace' system call argument beautifiers. For instance, the 'clone' syscall argument 'flag' needs to use CLONE_NEWCGROUP, and that is not available in RHEL7. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-81sln0ng4a2lcxrth14vcov4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/sched.h | 52 ++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 1 + 3 files changed, 54 insertions(+) create mode 100644 tools/include/uapi/linux/sched.h diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h new file mode 100644 index 000000000000..e2a6c7b3510b --- /dev/null +++ b/tools/include/uapi/linux/sched.h @@ -0,0 +1,52 @@ +#ifndef _UAPI_LINUX_SCHED_H +#define _UAPI_LINUX_SCHED_H + +/* + * cloning flags: + */ +#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ +#define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ +#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ +#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ +#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ +#define CLONE_THREAD 0x00010000 /* Same thread group? */ +#define CLONE_NEWNS 0x00020000 /* New mount namespace group */ +#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ +#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ +#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ +#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */ +#define CLONE_DETACHED 0x00400000 /* Unused, ignored */ +#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ +#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ +#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ +#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ +#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ +#define CLONE_NEWUSER 0x10000000 /* New user namespace */ +#define CLONE_NEWPID 0x20000000 /* New pid namespace */ +#define CLONE_NEWNET 0x40000000 /* New network namespace */ +#define CLONE_IO 0x80000000 /* Clone io context */ + +/* + * Scheduling policies + */ +#define SCHED_NORMAL 0 +#define SCHED_FIFO 1 +#define SCHED_RR 2 +#define SCHED_BATCH 3 +/* SCHED_ISO: reserved but not implemented yet */ +#define SCHED_IDLE 5 +#define SCHED_DEADLINE 6 + +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ +#define SCHED_RESET_ON_FORK 0x40000000 + +/* + * For the sched_{set,get}attr() calls + */ +#define SCHED_FLAG_RESET_ON_FORK 0x01 +#define SCHED_FLAG_RECLAIM 0x02 + +#endif /* _UAPI_LINUX_SCHED_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index a29da46d180f..9f2267b82eb2 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -78,6 +78,7 @@ tools/include/uapi/linux/fcntl.h tools/include/uapi/linux/hw_breakpoint.h tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h +tools/include/uapi/linux/sched.h tools/include/uapi/linux/stat.h tools/include/linux/poison.h tools/include/linux/rbtree.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 47abd3325190..9b6295809a3b 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -3,6 +3,7 @@ HEADERS=' include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h +include/uapi/linux/sched.h include/uapi/linux/stat.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h From 33396a3a6a6b16784291ac19708e3a6cf85db6c5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 16:15:17 -0300 Subject: [PATCH 097/253] perf trace beauty clone: Beautify syscall arguments Now, syswide tracing, selected entries: # trace -e clone 24417.203 ( 0.158 ms): bash/11323 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, parent_tidptr: 0, child_tidptr: 0x7f0778e5c9d0, tls: 0x7f0778e5c700) = 11325 (bash) ? ( ? ): bash/11325 ... [continued]: clone()) = 0 24419.355 ( 0.093 ms): bash/10586 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, parent_tidptr: 0, child_tidptr: 0x7f0778e5c9d0, tls: 0x7f0778e5c700) = 11326 (bash) ? ( ? ): bash/11326 ... [continued]: clone()) = 0 24419.744 ( 0.102 ms): bash/11326 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, parent_tidptr: 0, child_tidptr: 0x7f0778e5c9d0, tls: 0x7f0778e5c700) = 11327 (bash) ? ( ? ): bash/11327 ... [continued]: clone()) = 0 24420.138 ( 0.105 ms): bash/11327 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, parent_tidptr: 0, child_tidptr: 0x7f0778e5c9d0, tls: 0x7f0778e5c700) = 11328 (bash) ? ( ? ): bash/11328 ... [continued]: clone()) = 0 35747.722 ( 0.044 ms): gpg-agent/18087 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ff0755f6ff0, parent_tidptr: 0x7ff0755f79d0, child_tidptr: 0x7ff0755f79d0, tls: 0x7ff0755f7700) = 11329 (gpg-agent) ? ( ? ): gpg-agent/11329 ... [continued]: clone()) = 0 35748.359 ( 0.022 ms): gpg-agent/18087 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ff075df7ff0, parent_tidptr: 0x7ff075df89d0, child_tidptr: 0x7ff075df89d0, tls: 0x7ff075df8700) = 11330 (gpg-agent) ? ( ? ): gpg-agent/11330 ... [continued]: clone()) = 0 35781.422 ( 0.452 ms): NetworkManager/1112 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f2f1fffedb0, parent_tidptr: 0x7f2f1ffff9d0, child_tidptr: 0x7f2f1ffff9d0, tls: 0x7f2f1ffff700) = 11331 (NetworkManager) ? ( ? ): NetworkManager/11331 ... [continued]: clone()) = 0 Need to improve the formatting of the second return, to the child, this cset only focused on the argument formatting. If we trace just one pid: # trace -e clone -p 19863 0.349 ( 0.025 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb84eaac70, parent_tidptr: 0x7ffb84eab9d0, child_tidptr: 0x7ffb84eab9d0, tls: 0x7ffb84eab700) = 11637 (Chrome_IOThread) 0.392 ( 0.013 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb664b8c70, parent_tidptr: 0x7ffb664b99d0, child_tidptr: 0x7ffb664b99d0, tls: 0x7ffb664b9700) = 11638 (Chrome_IOThread) 0.573 ( 0.015 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb6046cc70, parent_tidptr: 0x7ffb6046d9d0, child_tidptr: 0x7ffb6046d9d0, tls: 0x7ffb6046d700) = 11639 (Chrome_IOThread) 0.617 ( 0.014 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb730dcc70, parent_tidptr: 0x7ffb730dd9d0, child_tidptr: 0x7ffb730dd9d0, tls: 0x7ffb730dd700) = 11640 (Chrome_IOThread) 4.350 ( 0.065 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb720d9c70, parent_tidptr: 0x7ffb720da9d0, child_tidptr: 0x7ffb720da9d0, tls: 0x7ffb720da700) = 11642 (Chrome_IOThread) 5.642 ( 0.079 ms): Chrome_IOThrea/19863 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7ffb718d8c70, parent_tidptr: 0x7ffb718d99d0, child_tidptr: 0x7ffb718d99d0, tls: 0x7ffb718d9700) = 11643 (Chrome_IOThread) ^C# We'll also have to fix the argument ordering in different arches, probably having multiple syscall_fmt entries with each possible order and then use perf_evsel__env_arch() (if dealing with a perf.data file) or the current system info, for live sessions. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-am068uyubgj83snepolwhbfe@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 7 +++- tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/clone.c | 58 ++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/clone.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6664293584df..9e74e675d7cb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -626,7 +626,12 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, }, { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, - { .name = "clone", .errpid = true, }, + { .name = "clone", .errpid = true, .nr_args = 5, + .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, + [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, + [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, }, + [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, }, + [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, { .name = "close", .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, { .name = "epoll_ctl", diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index c9e215b806f1..eaa1e8e8e100 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,2 +1,3 @@ +libperf-y += clone.o libperf-y += fcntl.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index f75ef7d0b303..69a5c8a2d420 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -66,6 +66,9 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PID syscall_arg__scnprintf_pid +size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_CLONE_FLAGS syscall_arg__scnprintf_clone_flags + size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c new file mode 100644 index 000000000000..ab982a7c63bb --- /dev/null +++ b/tools/perf/trace/beauty/clone.c @@ -0,0 +1,58 @@ +/* + * trace/beauty/cone.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include +#include + +static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ + int printed = 0; + +#define P_FLAG(n) \ + if (flags & CLONE_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~CLONE_##n; \ + } + + P_FLAG(VM); + P_FLAG(FS); + P_FLAG(FILES); + P_FLAG(SIGHAND); + P_FLAG(PTRACE); + P_FLAG(VFORK); + P_FLAG(PARENT); + P_FLAG(THREAD); + P_FLAG(NEWNS); + P_FLAG(SYSVSEM); + P_FLAG(SETTLS); + P_FLAG(PARENT_SETTID); + P_FLAG(CHILD_CLEARTID); + P_FLAG(DETACHED); + P_FLAG(UNTRACED); + P_FLAG(CHILD_SETTID); + P_FLAG(NEWCGROUP); + P_FLAG(NEWUTS); + P_FLAG(NEWIPC); + P_FLAG(NEWUSER); + P_FLAG(NEWPID); + P_FLAG(NEWNET); + P_FLAG(IO); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + return clone__scnprintf_flags(arg->val, bf, size); +} From 15bed2742a8d60210b958963ca1091d3cfc4f332 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 19 Jul 2017 16:52:59 -0300 Subject: [PATCH 098/253] perf trace beauty clone: Suppress unused args according to 'flags' arg The 'parent_tidptr', 'child_tidptr' and 'tls' arguments to the 'clone' syscall are only used when certain flags are set in 'flags', suppress them when those aren't there. E.g: 9886.919 (0.236 ms): fetchmail/19298 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, child_tidptr: 0x7fe43f468590) = 19608 (fetchmail) 12876.052 (0.249 ms): qemu-system-x8/21238 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f48117fc770, parent_tidptr: 0x7f48117ff9d0, child_tidptr: 0x7f48117ff9d0, tls: 0x7f48117ff700) = 19611 (qemu-system-x86) 12876.555 (0.048 ms): worker/19611 clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7f480f7f8770, parent_tidptr: 0x7f480f7fb9d0, child_tidptr: 0x7f480f7fb9d0, tls: 0x7f480f7fb700) = 19612 (worker) 16575.240 (0.469 ms): fetchmail/19298 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, child_tidptr: 0x7fe43f468590) = 19613 (fetchmail) 20797.270 (0.335 ms): fetchmail/19298 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, child_tidptr: 0x7fe43f468590) = 19614 (fetchmail) 21228.585 (0.501 ms): vim/19519 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, child_tidptr: 0x7fbad6ac27d0) = 19615 (vim) 21232.193 (0.137 ms): bash/19615 clone(flags: CHILD_CLEARTID|CHILD_SETTID|0x11, child_stack: 0, child_tidptr: 0x7fad8bff49d0) = 19616 (bash) Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0um93djul9knf239gwa5mpcb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/clone.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c index ab982a7c63bb..d64d049ab991 100644 --- a/tools/perf/trace/beauty/clone.c +++ b/tools/perf/trace/beauty/clone.c @@ -54,5 +54,22 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size) size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg) { - return clone__scnprintf_flags(arg->val, bf, size); + unsigned long flags = arg->val; + enum syscall_clone_args { + SCC_FLAGS = (1 << 0), + SCC_CHILD_STACK = (1 << 1), + SCC_PARENT_TIDPTR = (1 << 2), + SCC_CHILD_TIDPTR = (1 << 3), + SCC_TLS = (1 << 4), + }; + if (!(flags & CLONE_PARENT_SETTID)) + arg->mask |= SCC_PARENT_TIDPTR; + + if (!(flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID))) + arg->mask |= SCC_CHILD_TIDPTR; + + if (!(flags & CLONE_SETTLS)) + arg->mask |= SCC_TLS; + + return clone__scnprintf_flags(flags, bf, size); } From dd1a50377c92321f78fa4d0601bb4d88d88670ab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Jul 2017 11:17:51 -0300 Subject: [PATCH 099/253] perf trace: Introduce filter_loop_pids() No change in functionality, just to make clearer that what we want when filtering the tracer pid in a system wide tracing session is to avoid a feedback loop. This also paves the way for a more interesting loop avoidance algorithm, one that tries to figure out if we are in a ssh session, xterm, etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-5fcttc5kdjkcyp9404ezkuy9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 9e74e675d7cb..0ba36f08efd8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2239,6 +2239,16 @@ static int trace__set_ev_qualifier_filter(struct trace *trace) goto out; } +static int trace__set_filter_loop_pids(struct trace *trace) +{ + int nr = 1; + pid_t pids[32] = { + getpid(), + }; + + return perf_evlist__set_filter_pids(trace->evlist, nr, pids); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; @@ -2362,7 +2372,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->filter_pids.nr > 0) err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); else if (thread_map__pid(evlist->threads, 0) == -1) - err = perf_evlist__set_filter_pid(evlist, getpid()); + err = trace__set_filter_loop_pids(trace); if (err < 0) goto out_error_mem; From 082ab9a18e532864d1ceecfb50221df62b1d5a92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Jul 2017 11:32:05 -0300 Subject: [PATCH 100/253] perf trace: Filter out 'sshd' in the tracer ancestry in syswide tracing Avoiding a loop, so now its quite convenient to ssh to a machine and then simply do: # perf trace To trace all syscalls without causing a loop. This was possible using --filter-pids, i.e. once you noticed the loop, get the sshd pid and add it to --filter-pids, restarting the 'perf trace'. Now to figure out how to do that in a X terminal, the other common scenario, which is way more involved, as there are multiple processes communicating to process terminal activity... Using --filter-pids + '-e \!syscall,names,you,dont,need' may be a good approximation when having to do syswide tracing on your workstation. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-68rjeao9wnpylla41htk7xps@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0ba36f08efd8..05d24b6570ee 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2241,10 +2241,24 @@ static int trace__set_ev_qualifier_filter(struct trace *trace) static int trace__set_filter_loop_pids(struct trace *trace) { - int nr = 1; + unsigned int nr = 1; pid_t pids[32] = { getpid(), }; + struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]); + + while (thread && nr < ARRAY_SIZE(pids)) { + struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid); + + if (parent == NULL) + break; + + if (!strcmp(thread__comm_str(parent), "sshd")) { + pids[nr++] = parent->tid; + break; + } + thread = parent; + } return perf_evlist__set_filter_pids(trace->evlist, nr, pids); } From 8e99b6d4533cf3f49dcd813155a513a5b572baef Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Jul 2017 15:27:39 -0300 Subject: [PATCH 101/253] tools include: Adopt strstarts() from the kernel Replacing prefixcmp(), same purpose, inverted result, so standardize on the kernel variant, to reduce silly differences among tools/ and the kernel sources, making it easier for people to work in both codebases. And then doing: if (strstarts(option, "no-")) Looks clearer than doing: if (!prefixcmp(option, "no-")) To figure out if option starts witn "no-". Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Rusty Russell Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-kaei42gi7lpa8subwtv7eug8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/string.h | 12 ++++++++++-- tools/lib/string.c | 9 --------- tools/lib/subcmd/help.c | 2 +- tools/lib/subcmd/parse-options.c | 18 +++++++++--------- tools/perf/builtin-config.c | 3 ++- tools/perf/builtin-ftrace.c | 2 +- tools/perf/builtin-help.c | 6 +++--- tools/perf/perf.c | 16 ++++++++-------- tools/perf/ui/browser.c | 3 ++- tools/perf/ui/browsers/annotate.c | 3 ++- tools/perf/ui/stdio/hist.c | 3 ++- tools/perf/util/bpf-loader.c | 2 +- tools/perf/util/callchain.c | 2 +- tools/perf/util/config.c | 13 +++++++------ tools/perf/util/llvm-utils.c | 2 +- 15 files changed, 50 insertions(+), 46 deletions(-) diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index d62b56cf8c12..a30fad536f52 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -1,8 +1,8 @@ #ifndef _TOOLS_LINUX_STRING_H_ #define _TOOLS_LINUX_STRING_H_ - #include /* for size_t */ +#include void *memdup(const void *src, size_t len); @@ -18,6 +18,14 @@ extern size_t strlcpy(char *dest, const char *src, size_t size); char *str_error_r(int errnum, char *buf, size_t buflen); -int prefixcmp(const char *str, const char *prefix); +/** + * strstarts - does @str start with @prefix? + * @str: string to examine + * @prefix: prefix to look for. + */ +static inline bool strstarts(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} #endif /* _LINUX_STRING_H_ */ diff --git a/tools/lib/string.c b/tools/lib/string.c index 8e678af1c6ee..bd239bc1d557 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -87,12 +87,3 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } - -int prefixcmp(const char *str, const char *prefix) -{ - for (; ; str++, prefix++) - if (!*prefix) - return 0; - else if (*str != *prefix) - return (unsigned char)*prefix - (unsigned char)*str; -} diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index ba970a73d053..0310520f918e 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -171,7 +171,7 @@ static void list_commands_in_dir(struct cmdnames *cmds, while ((de = readdir(dir)) != NULL) { int entlen; - if (prefixcmp(de->d_name, prefix)) + if (!strstarts(de->d_name, prefix)) continue; astrcat(&buf, de->d_name); diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 359bfa77f39c..2bd6fd0c1d40 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -368,7 +368,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, return 0; } if (!rest) { - if (!prefixcmp(options->long_name, "no-")) { + if (strstarts(options->long_name, "no-")) { /* * The long name itself starts with "no-", so * accept the option without "no-" so that users @@ -381,7 +381,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, goto match; } /* Abbreviated case */ - if (!prefixcmp(options->long_name + 3, arg)) { + if (strstarts(options->long_name + 3, arg)) { flags |= OPT_UNSET; goto is_abbreviated; } @@ -406,7 +406,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, continue; } /* negated and abbreviated very much? */ - if (!prefixcmp("no-", arg)) { + if (strstarts("no-", arg)) { flags |= OPT_UNSET; goto is_abbreviated; } @@ -416,7 +416,7 @@ static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, flags |= OPT_UNSET; rest = skip_prefix(arg + 3, options->long_name); /* abbreviated and negated? */ - if (!rest && !prefixcmp(options->long_name, arg + 3)) + if (!rest && strstarts(options->long_name, arg + 3)) goto is_abbreviated; if (!rest) continue; @@ -456,7 +456,7 @@ static void check_typos(const char *arg, const struct option *options) if (strlen(arg) < 3) return; - if (!prefixcmp(arg, "no-")) { + if (strstarts(arg, "no-")) { fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); exit(129); } @@ -464,7 +464,7 @@ static void check_typos(const char *arg, const struct option *options) for (; options->type != OPTION_END; options++) { if (!options->long_name) continue; - if (!prefixcmp(options->long_name, arg)) { + if (strstarts(options->long_name, arg)) { fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); exit(129); } @@ -933,10 +933,10 @@ int parse_options_usage(const char * const *usagestr, if (opts->long_name == NULL) continue; - if (!prefixcmp(opts->long_name, optstr)) + if (strstarts(opts->long_name, optstr)) print_option_help(opts, 0); - if (!prefixcmp("no-", optstr) && - !prefixcmp(opts->long_name, optstr + 3)) + if (strstarts("no-", optstr) && + strstarts(opts->long_name, optstr + 3)) print_option_help(opts, 0); } diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index ece45582a48d..3ddcc6e2abeb 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -13,6 +13,7 @@ #include "util/util.h" #include "util/debug.h" #include "util/config.h" +#include static bool use_system_config, use_user_config; @@ -79,7 +80,7 @@ static int show_spec_config(struct perf_config_set *set, const char *var) return -1; perf_config_items__for_each_entry(&set->sections, section) { - if (prefixcmp(var, section->name) != 0) + if (!strstarts(var, section->name)) continue; perf_config_items__for_each_entry(§ion->items, item) { diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index dd26c62c9893..25a42acabee1 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -381,7 +381,7 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace = cb; - if (prefixcmp(var, "ftrace.")) + if (!strstarts(var, "ftrace.")) return 0; if (strcmp(var, "ftrace.tracer")) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 530a7f2fa0f3..dbe4e4153bcf 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -90,7 +90,7 @@ static int check_emacsclient_version(void) */ finish_command(&ec_process); - if (prefixcmp(buffer.buf, "emacsclient")) { + if (!strstarts(buffer.buf, "emacsclient")) { fprintf(stderr, "Failed to parse emacsclient version.\n"); goto out; } @@ -283,7 +283,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) add_man_viewer(value); return 0; } - if (!prefixcmp(var, "man.")) + if (!strstarts(var, "man.")) return add_man_viewer_info(var, value); return 0; @@ -313,7 +313,7 @@ static const char *cmd_to_page(const char *perf_cmd) if (!perf_cmd) return "perf"; - else if (!prefixcmp(perf_cmd, "perf")) + else if (!strstarts(perf_cmd, "perf")) return perf_cmd; return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 628a5e412cb1..e0279babe0c0 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -89,7 +89,7 @@ struct pager_config { static int pager_command_config(const char *var, const char *value, void *data) { struct pager_config *c = data; - if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) + if (strstarts(var, "pager.") && !strcmp(var + 6, c->cmd)) c->val = perf_config_bool(var, value); return 0; } @@ -108,9 +108,9 @@ static int check_pager_config(const char *cmd) static int browser_command_config(const char *var, const char *value, void *data) { struct pager_config *c = data; - if (!prefixcmp(var, "tui.") && !strcmp(var + 4, c->cmd)) + if (strstarts(var, "tui.") && !strcmp(var + 4, c->cmd)) c->val = perf_config_bool(var, value); - if (!prefixcmp(var, "gtk.") && !strcmp(var + 4, c->cmd)) + if (strstarts(var, "gtk.") && !strcmp(var + 4, c->cmd)) c->val = perf_config_bool(var, value) ? 2 : 0; return 0; } @@ -192,7 +192,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) /* * Check remaining flags. */ - if (!prefixcmp(cmd, CMD_EXEC_PATH)) { + if (strstarts(cmd, CMD_EXEC_PATH)) { cmd += strlen(CMD_EXEC_PATH); if (*cmd == '=') set_argv_exec_path(cmd + 1); @@ -229,7 +229,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) *envchanged = 1; (*argv)++; (*argc)--; - } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { + } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) { tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); fprintf(stderr, "dir: %s\n", tracing_path); if (envchanged) @@ -470,14 +470,14 @@ int main(int argc, const char **argv) * So we just directly call the internal command handler, and * die if that one cannot handle it. */ - if (!prefixcmp(cmd, "perf-")) { + if (strstarts(cmd, "perf-")) { cmd += 5; argv[0] = cmd; handle_internal_command(argc, argv); fprintf(stderr, "cannot handle %s internally", cmd); goto out; } - if (!prefixcmp(cmd, "trace")) { + if (strstarts(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT setup_path(); argv[0] = "trace"; @@ -495,7 +495,7 @@ int main(int argc, const char **argv) commit_pager_choice(); if (argc > 0) { - if (!prefixcmp(argv[0], "--")) + if (strstarts(argv[0], "--")) argv[0] += 2; } else { /* The user didn't specify a command; give them help */ diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index f73f3f13e01d..d0c2007c307b 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include "browser.h" @@ -563,7 +564,7 @@ static int ui_browser__color_config(const char *var, const char *value, int i; /* same dir for all commands */ - if (prefixcmp(var, "colors.") != 0) + if (!strstarts(var, "colors.") != 0) return 0; for (i = 0; ui_browser__colorsets[i].name != NULL; ++i) { diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8d3f6f53c122..6794a8bec404 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -13,6 +13,7 @@ #include #include #include +#include #include struct disasm_line_samples { @@ -1198,7 +1199,7 @@ static int annotate__config(const char *var, const char *value, struct annotate_config *cfg; const char *name; - if (prefixcmp(var, "annotate.") != 0) + if (!strstarts(var, "annotate.")) return 0; name = var + 9; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 2df8eb1ed3c0..5c95b8301c67 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -1,4 +1,5 @@ #include +#include #include "../../util/util.h" #include "../../util/hist.h" @@ -292,7 +293,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, * displayed twice. */ if (!i++ && field_order == NULL && - sort_order && !prefixcmp(sort_order, "sym")) + sort_order && strstarts(sort_order, "sym")) continue; if (!printed) { diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 4bd2d1d882af..4a1264c66101 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1246,7 +1246,7 @@ int bpf__config_obj(struct bpf_object *obj, if (!obj || !term || !term->config) return -EINVAL; - if (!prefixcmp(term->config, "map:")) { + if (strstarts(term->config, "map:")) { key_scan_pos = sizeof("map:") - 1; err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); goto out; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 22d413ae6025..02130e2e72c7 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -304,7 +304,7 @@ int perf_callchain_config(const char *var, const char *value) { char *endptr; - if (prefixcmp(var, "call-graph.")) + if (!strstarts(var, "call-graph.")) return 0; var += sizeof("call-graph.") - 1; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 31a7dea248d0..bc75596f9e79 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "sane_ctype.h" @@ -433,22 +434,22 @@ static int perf_ui_config(const char *var, const char *value) int perf_default_config(const char *var, const char *value, void *dummy __maybe_unused) { - if (!prefixcmp(var, "core.")) + if (strstarts(var, "core.")) return perf_default_core_config(var, value); - if (!prefixcmp(var, "hist.")) + if (strstarts(var, "hist.")) return perf_hist_config(var, value); - if (!prefixcmp(var, "ui.")) + if (strstarts(var, "ui.")) return perf_ui_config(var, value); - if (!prefixcmp(var, "call-graph.")) + if (strstarts(var, "call-graph.")) return perf_callchain_config(var, value); - if (!prefixcmp(var, "llvm.")) + if (strstarts(var, "llvm.")) return perf_llvm_config(var, value); - if (!prefixcmp(var, "buildid.")) + if (strstarts(var, "buildid.")) return perf_buildid_config(var, value); /* Add other config variables here. */ diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index c6a15f204c03..209b0c82eff4 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -33,7 +33,7 @@ struct llvm_param llvm_param = { int perf_llvm_config(const char *var, const char *value) { - if (prefixcmp(var, "llvm.")) + if (!strstarts(var, "llvm.")) return 0; var += sizeof("llvm.") - 1; From b99e4850df86dfd9dc2cf3f3494e403ff8b46876 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Jul 2017 15:35:33 -0300 Subject: [PATCH 102/253] tools lib: Update copy of strtobool from the kernel sources Getting support for "on", "off" introduced in a81a5a17d44b ("lib: add "on"/"off" support to kstrtobool") and making it check for NULL, introduced in ef951599074b ("lib: move strtobool() to kstrtobool()"). Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Kees Cook Link: http://lkml.kernel.org/n/tip-mu8ghin4rklacmmubzwv8td7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/string.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/lib/string.c b/tools/lib/string.c index bd239bc1d557..a4246f14ded1 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -39,27 +39,45 @@ void *memdup(const void *src, size_t len) * @s: input string * @res: result * - * This routine returns 0 iff the first character is one of 'Yy1Nn0'. - * Otherwise it will return -EINVAL. Value pointed to by res is - * updated upon finding a match. + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or + * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value + * pointed to by res is updated upon finding a match. */ int strtobool(const char *s, bool *res) { + if (!s) + return -EINVAL; + switch (s[0]) { case 'y': case 'Y': case '1': *res = true; - break; + return 0; case 'n': case 'N': case '0': *res = false; - break; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } default: - return -EINVAL; + break; } - return 0; + + return -EINVAL; } /** From 896bccd3cb8d95cbc565687715516009c5169e71 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 20 Jul 2017 06:36:45 +0900 Subject: [PATCH 103/253] perf annotate: Introduce struct sym_hist_entry struct sym_hist has addr[] but it should have not only number of samples but also the sample period. So use new struct symhist_entry to pave the way to have that. Committer notes: This initial patch will only introduce the struct sym_hist_entry and use only the nr_samples member, which makes the code clearer and paves the way to save the period as well. Signed-off-by: Taeung Song Suggested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1500500205-16553-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 6 ++--- tools/perf/ui/gtk/annotate.c | 4 +-- tools/perf/util/annotate.c | 45 ++++++++++++++++--------------- tools/perf/util/annotate.h | 9 +++++-- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 6794a8bec404..dbe4e630b90f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -450,14 +450,14 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, next = disasm__get_next_ip_line(¬es->src->source, pos); for (i = 0; i < browser->nr_events; i++) { - u64 nr_samples; + struct sym_hist_entry sample; bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, pos->offset, next ? next->offset : len, - &path, &nr_samples); - bpos->samples[i].nr = nr_samples; + &path, &sample); + bpos->samples[i].nr = sample.nr_samples; if (max_percent < bpos->samples[i].percent) max_percent = bpos->samples[i].percent; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 87e3760624f2..d736fd57ab9b 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -34,10 +34,10 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, return 0; symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->offset]) + if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples) return 0; - percent = 100.0 * symhist->addr[dl->offset] / symhist->sum; + percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->sum; markup = perf_gtk__get_percent_color(percent); if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1742510f0120..c3829555ce1c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -610,10 +610,10 @@ int symbol__alloc_hist(struct symbol *sym) size_t sizeof_sym_hist; /* Check for overflow when calculating sizeof_sym_hist */ - if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(u64)) + if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) return -1; - sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(u64)); + sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); /* Check for overflow in zalloc argument */ if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) @@ -714,11 +714,11 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, offset = addr - sym->start; h = annotation__histogram(notes, evidx); h->sum++; - h->addr[offset]++; + h->addr[offset].nr_samples++; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name, - addr, addr - sym->start, evidx, h->addr[offset]); + addr, addr - sym->start, evidx, h->addr[offset].nr_samples); return 0; } @@ -928,11 +928,12 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa } double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, u64 *nr_samples) + s64 end, const char **path, struct sym_hist_entry *sample) { struct source_line *src_line = notes->src->lines; double percent = 0.0; - *nr_samples = 0; + + sample->nr_samples = 0; if (src_line) { size_t sizeof_src_line = sizeof(*src_line) + @@ -946,7 +947,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, *path = src_line->path; percent += src_line->samples[evidx].percent; - *nr_samples += src_line->samples[evidx].nr; + sample->nr_samples += src_line->samples[evidx].nr; offset++; } } else { @@ -954,10 +955,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, unsigned int hits = 0; while (offset < end) - hits += h->addr[offset++]; + hits += h->addr[offset++].nr_samples; if (h->sum) { - *nr_samples = hits; + sample->nr_samples = hits; percent = 100.0 * hits / h->sum; } } @@ -1057,10 +1058,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (dl->offset != -1) { const char *path = NULL; - u64 nr_samples; double percent, max_percent = 0.0; double *ppercents = &percent; - u64 *psamples = &nr_samples; + struct sym_hist_entry sample; + struct sym_hist_entry *psamples = &sample; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); @@ -1074,7 +1075,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) { nr_percent = evsel->nr_members; ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(u64)); + psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); if (ppercents == NULL || psamples == NULL) { return -1; } @@ -1085,10 +1086,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st notes->src->lines ? i : evsel->idx + i, offset, next ? next->offset : (s64) len, - &path, &nr_samples); + &path, &sample); ppercents[i] = percent; - psamples[i] = nr_samples; + psamples[i] = sample; if (percent > max_percent) max_percent = percent; } @@ -1126,12 +1127,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st for (i = 0; i < nr_percent; i++) { percent = ppercents[i]; - nr_samples = psamples[i]; + sample = psamples[i]; color = get_percent_color(percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %7" PRIu64, - nr_samples); + sample.nr_samples); else color_fprintf(stdout, color, " %7.2f", percent); } @@ -1147,7 +1148,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (ppercents != &percent) free(ppercents); - if (psamples != &nr_samples) + if (psamples != &sample) free(psamples); } else if (max_lines && printed >= max_lines) @@ -1702,7 +1703,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, double percent = 0.0; h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i]; + nr_samples = h->addr[i].nr_samples; if (h->sum) percent = 100.0 * nr_samples / h->sum; @@ -1773,9 +1774,9 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) u64 len = symbol__size(sym), offset; for (offset = 0; offset < len; ++offset) - if (h->addr[offset] != 0) + if (h->addr[offset].nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, - sym->start + offset, h->addr[offset]); + sym->start + offset, h->addr[offset].nr_samples); printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); } @@ -1878,8 +1879,8 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) h->sum = 0; for (offset = 0; offset < len; ++offset) { - h->addr[offset] = h->addr[offset] * 7 / 8; - h->sum += h->addr[offset]; + h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8; + h->sum += h->addr[offset].nr_samples; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index bac698d7cc6a..3a176633b324 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -74,16 +74,21 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl) return dl->ops.target.offset_avail; } +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + void disasm_line__free(struct disasm_line *dl); struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, u64 *nr_samples); + s64 end, const char **path, struct sym_hist_entry *sample); struct sym_hist { u64 sum; - u64 addr[0]; + struct sym_hist_entry addr[0]; }; struct cyc_hist { From 8158683da3d30e0346275702a8e08f2b22726c45 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 20 Jul 2017 06:36:51 +0900 Subject: [PATCH 104/253] perf annotate: Rename 'sum' to 'nr_samples' in struct sym_hist To make it more clear that it is the sum of all the nr_samples fields in the addr[] entries, i.e.: sym_hist->nr_samples = sum(sym_hist->addr[0 .. symbol__size(sym)]->nr_samples) Committer notes: Taeung had renamed it to total_samples, but using nr_samples, as in the added explanation above, looks clearer and establishes the direct connection, making clear it is about the _number_ of samples. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1500500211-16599-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/util/annotate.c | 26 +++++++++++++------------- tools/perf/util/annotate.h | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index d736fd57ab9b..02176193f427 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -37,7 +37,7 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples) return 0; - percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->sum; + percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c3829555ce1c..58c6b63ff049 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -713,7 +713,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, offset = addr - sym->start; h = annotation__histogram(notes, evidx); - h->sum++; + h->nr_samples++; h->addr[offset].nr_samples++; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 @@ -957,9 +957,9 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, while (offset < end) hits += h->addr[offset++].nr_samples; - if (h->sum) { + if (h->nr_samples) { sample->nr_samples = hits; - percent = 100.0 * hits / h->sum; + percent = 100.0 * hits / h->nr_samples; } } @@ -1672,19 +1672,19 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, struct sym_hist *h = annotation__histogram(notes, evidx); struct rb_root tmp_root = RB_ROOT; int nr_pcnt = 1; - u64 h_sum = h->sum; + u64 nr_samples = h->nr_samples; size_t sizeof_src_line = sizeof(struct source_line); if (perf_evsel__is_group_event(evsel)) { for (i = 1; i < evsel->nr_members; i++) { h = annotation__histogram(notes, evidx + i); - h_sum += h->sum; + nr_samples += h->nr_samples; } nr_pcnt = evsel->nr_members; sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); } - if (!h_sum) + if (!nr_samples) return 0; src_line = notes->src->lines = calloc(len, sizeof_src_line); @@ -1694,7 +1694,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, start = map__rip_2objdump(map, sym->start); for (i = 0; i < len; i++) { - u64 offset, nr_samples; + u64 offset; double percent_max = 0.0; src_line->nr_pcnt = nr_pcnt; @@ -1704,8 +1704,8 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, h = annotation__histogram(notes, evidx + k); nr_samples = h->addr[i].nr_samples; - if (h->sum) - percent = 100.0 * nr_samples / h->sum; + if (h->nr_samples) + percent = 100.0 * nr_samples / h->nr_samples; if (percent > percent_max) percent_max = percent; @@ -1777,7 +1777,7 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) if (h->addr[offset].nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, sym->start + offset, h->addr[offset].nr_samples); - printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); + printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } int symbol__annotate_printf(struct symbol *sym, struct map *map, @@ -1813,7 +1813,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, width *= evsel->nr_members; graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", - width, width, "Percent", d_filename, evsel_name, h->sum); + width, width, "Percent", d_filename, evsel_name, h->nr_samples); printf("%-*.*s----\n", graph_dotted_len, graph_dotted_len, graph_dotted_line); @@ -1877,10 +1877,10 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) struct sym_hist *h = annotation__histogram(notes, evidx); int len = symbol__size(sym), offset; - h->sum = 0; + h->nr_samples = 0; for (offset = 0; offset < len; ++offset) { h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8; - h->sum += h->addr[offset].nr_samples; + h->nr_samples += h->addr[offset].nr_samples; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 3a176633b324..e8c246ec53df 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -87,7 +87,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, s64 end, const char **path, struct sym_hist_entry *sample); struct sym_hist { - u64 sum; + u64 nr_samples; struct sym_hist_entry addr[0]; }; From bab89f6aed7e745893e009014354d0caaf62acf7 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 20 Jul 2017 16:28:53 -0300 Subject: [PATCH 105/253] perf hists: Pass perf_sample to __symbol__inc_addr_samples() To pave the way to use perf_sample fields in the annotate code, storing sample->period in sym_hist->addr->period and its sum in sym_hist->period. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1500500215-16646-1-git-send-email-treeze.taeung@gmail.com [ split and adjusted from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 15 ++++++++------- tools/perf/builtin-top.c | 5 +++-- tools/perf/util/annotate.c | 18 +++++++++++------- tools/perf/util/annotate.h | 6 ++++-- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5205408e795b..96fe1a88c1e5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -184,7 +184,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, if (he == NULL) return -ENOMEM; - ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); hists__inc_nr_samples(hists, true); return ret; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cea25d03f4dd..983b238d5eea 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -115,37 +115,38 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct report *rep = arg; struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; struct mem_info *mi; struct branch_info *bi; if (!ui__has_annotation()) return 0; - hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + hist__account_cycles(sample->branch_stack, al, sample, rep->nonany_branch_mode); if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); } else if (rep->mem_mode) { mi = he->mem_info; - err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx); + err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx); if (err) goto out; - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); } else if (symbol_conf.cumulate_callchain) { if (single) - err = hist_entry__inc_addr_samples(he, evsel->idx, + err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); } else { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); } out: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 022486dc67f5..e5a8f249077f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -183,6 +183,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) static void perf_top__record_precise_ip(struct perf_top *top, struct hist_entry *he, + struct perf_sample *sample, int counter, u64 ip) { struct annotation *notes; @@ -199,7 +200,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, if (pthread_mutex_trylock(¬es->lock)) return; - err = hist_entry__inc_addr_samples(he, counter, ip); + err = hist_entry__inc_addr_samples(he, sample, counter, ip); pthread_mutex_unlock(¬es->lock); @@ -671,7 +672,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct perf_evsel *evsel = iter->evsel; if (perf_hpp_list.sym && single) - perf_top__record_precise_ip(top, he, evsel->idx, al->addr); + perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 58c6b63ff049..c2fe16d5e473 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -697,7 +697,8 @@ static int __symbol__account_cycles(struct annotation *notes, } static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, - struct annotation *notes, int evidx, u64 addr) + struct annotation *notes, int evidx, u64 addr, + struct perf_sample *sample __maybe_unused) { unsigned offset; struct sym_hist *h; @@ -738,7 +739,8 @@ static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles } static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, - int evidx, u64 addr) + int evidx, u64 addr, + struct perf_sample *sample) { struct annotation *notes; @@ -747,7 +749,7 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, notes = symbol__get_annotation(sym, false); if (notes == NULL) return -ENOMEM; - return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); + return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample); } static int symbol__account_cycles(u64 addr, u64 start, @@ -811,14 +813,16 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, return err; } -int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) +int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, + int evidx) { - return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); + return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample); } -int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) +int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, + int evidx, u64 ip) { - return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); + return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample); } static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map *map) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e8c246ec53df..720f18195046 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -154,13 +154,15 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) return (void *)sym - symbol_conf.priv_size; } -int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, + int evidx); int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); -int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); +int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, + int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); From 461c17f00f400f95116880d91d20a7fcd84263a9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 20 Jul 2017 17:18:05 -0300 Subject: [PATCH 106/253] perf annotate: Store the sample period in each histogram bucket We'll use it soon, when fixing --show-total-period. Signed-off-by: Taeung Song Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1500500215-16646-1-git-send-email-treeze.taeung@gmail.com [ split from a larger patch, do the math in __symbol__inc_addr_samples() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++++++----- tools/perf/util/annotate.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c2fe16d5e473..00e085fc945b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -698,7 +698,7 @@ static int __symbol__account_cycles(struct annotation *notes, static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct annotation *notes, int evidx, u64 addr, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { unsigned offset; struct sym_hist *h; @@ -716,10 +716,13 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, h = annotation__histogram(notes, evidx); h->nr_samples++; h->addr[offset].nr_samples++; + h->period += sample->period; + h->addr[offset].period += sample->period; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 - ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name, - addr, addr - sym->start, evidx, h->addr[offset].nr_samples); + ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n", + sym->start, sym->name, addr, addr - sym->start, evidx, + h->addr[offset].nr_samples, h->addr[offset].period); return 0; } @@ -937,7 +940,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, struct source_line *src_line = notes->src->lines; double percent = 0.0; - sample->nr_samples = 0; + sample->nr_samples = sample->period = 0; if (src_line) { size_t sizeof_src_line = sizeof(*src_line) + @@ -957,11 +960,15 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, } else { struct sym_hist *h = annotation__histogram(notes, evidx); unsigned int hits = 0; + u64 period = 0; - while (offset < end) + while (offset < end) { hits += h->addr[offset++].nr_samples; + period += h->addr[offset++].period; + } if (h->nr_samples) { + sample->period = period; sample->nr_samples = hits; percent = 100.0 * hits / h->nr_samples; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 720f18195046..9ce575c25fd9 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -88,6 +88,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, struct sym_hist { u64 nr_samples; + u64 period; struct sym_hist_entry addr[0]; }; From ecd5f9959d2c9540482977ff1208ea67fbfb8cc9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 21 Jul 2017 11:38:48 -0300 Subject: [PATCH 107/253] perf annotate: Do not overwrite sample->period In fixing the --show-total-period option it was noticed that the value of sample->period was being overwritten, fix it. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Fixes: fd36f3dd7933 ("perf hist: Pass struct sample to __hists__add_entry()") Link: http://lkml.kernel.org/r/1500500215-16646-1-git-send-email-treeze.taeung@gmail.com [ split from a larger patch, added the Fixes tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 96fe1a88c1e5..7e33278eff67 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -177,7 +177,6 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, */ process_branch_stack(sample->branch_stack, al, sample); - sample->period = 1; sample->weight = 1; he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); From 585d93c5ffccced26689e34095c0d74ef20a07d6 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 21 Jul 2017 11:58:20 -0300 Subject: [PATCH 108/253] perf annotate stdio: Fix --show-total-period MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were showing the total number of samples, not the total period as asked by the user, fix it. Reported-by: Namhyung Kim Cc: Jiri Olsa Cc: Martin Liška Cc: Milian Wolff Link: http://lkml.kernel.org/n/tip-lh2nh89rtqn5x5vbfthw6qml@git.kernel.org Fixes: 0c4a5bcea460 ("perf annotate: Display total number of samples with --show-total-period") [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 00e085fc945b..004072f82511 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1143,7 +1143,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample.period); else color_fprintf(stdout, color, " %7.2f", percent); } From cf6383f73cf2be45d566fdb2b15b799675669de0 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 13 Jul 2017 15:02:52 +0200 Subject: [PATCH 109/253] perf report: Fix kernel symbol adjustment for s390x On s390x the kernel text segment starts at address 0x0. When perf report reads kernel symbols from vmlinux file it adds an offset of 0x1000. For example see symbol set_reset_devices: [root@s8360047 linux-devel]# nm -A vmlinux| fgrep set_reset_devices vmlinux:0000000001379000 t set_reset_devices [root@s8360047 linux-devel]# [root@s8360047 linux-devel]# fgrep set_reset_devices /proc/kallsyms 0000000001379000 t set_reset_devices [root@s8360047 linux-devel]# The kernel symbol table and the vmlinux file have the same address for symbol set_reset_devices namely 1379000. When perf report reads this symbols it displays it with address symbol__new: set_reset_devices 0x137a000-0x137a018 There is a difference between perf report and vmlinux of 0x1000. The reason for the difference is at kernel symbol load time in function dso__load_sym(). The vmlinux file is investigated with its ELF header. Command readelf shows this: Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .text PROGBITS 0000000000000000 00001000 0000000000b0e0c2 0000000000000000 AX 0 0 128 This leads to an invalid calculation of the symbol start address, see file utit/symbol-elf.c line 974: /* Adjust symbol to map to file offset */ if (adjust_kernel_syms) sym.st_value -= shdr.sh_addr - shdr.sh_offset; With shdr.sh_addr set to 0x0 and shdr.sh_offset set to 0x1000 as read from the ELF .text section 0x1000 is added to the symbol address. I would like to fix this by introducing an archticture specific function named elf__needs_adjust_symbols(). This is the same approach as done by PowerPC. The function currently does not exist for s390x and the default weak one is used. The s390x specific one returns false when symsrc_init() is invoked for kernel symbols and results in variable adjust_kernel_syms being false. This omits the adjustment and the correct address is displayed (when symbol resolvement does not work). The s390x specific function returns false for kernel symbol adjustment and returns true for kernel modules, processes and shared libraries. Signed-off-by: Thomas-Mich Richter Cc: Hendrik Brueckner Cc: Thomas-Mich Richter LPU-Reference: 20170713130252.6167-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/Build | 1 + tools/perf/arch/s390/util/sym-handling.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tools/perf/arch/s390/util/sym-handling.c diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 5bd7b9260cc0..bd518b623d7a 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += sym-handling.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c new file mode 100644 index 000000000000..b6cd056ccf71 --- /dev/null +++ b/tools/perf/arch/s390/util/sym-handling.c @@ -0,0 +1,22 @@ +/* + * Architecture specific ELF symbol handling and relocation mapping. + * + * Copyright 2017 IBM Corp. + * Author(s): Thomas Richter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ + +#include "symbol.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + if (ehdr.e_type == ET_EXEC) + return false; + return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN; +} + +#endif From cd8dd032f61abeb08d2c03bab4968a9de231a1be Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jul 2017 20:20:19 -0300 Subject: [PATCH 110/253] perf cgroup: Fix refcount usage When converting from atomic_t to refcount_t we didn't follow the usual step of initializing it to one before taking any new reference, which trips over checking if taking a reference for a freed refcount_t, fix it. Brendan's report: --- It's 4.12-rc7, with node v4.4.1. I'm building 4.13-rc1 now, as I hit what I think is another unrelated perf bug and I'm starting to wonder what else is broken on that version: (root) /mnt/src/linux-4.12-rc7/tools/perf # ./perf record -F 99 -a -e cpu-clock --cgroup=docker/f9e9d5df065b14646e8a11edc837a13877fd90c171137b2ba3feb67a0201cb65 -g perf: /mnt/src/linux-4.12-rc7/tools/include/linux/refcount.h:108: refcount_inc: Assertion `!(!refcount_inc_not_zero(r))' failed. Aborted that used to work... --- Testing it: Before: # perf stat -e cycles -C 0 --cgroup / perf: /home/acme/git/linux/tools/include/linux/refcount.h:108: refcount_inc: Assertion `!(!refcount_inc_not_zero(r))' failed. Aborted (core dumped) # After: # perf stat -e cycles -C 0 --cgroup / ^C Performance counter stats for 'CPU(s) 0': 132,081,393 cycles / 2.492942763 seconds time elapsed # Reported-by: Brendan Gregg Acked-by: Elena Reshetova Cc: Alexander Shishkin Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Kees Kook Cc: Krister Johansen Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sudeep Holla Cc: Thomas-Mich Richter Cc: Wang Nan Fixes: 79c5fe6db8c7 ("perf cgroup: Convert cgroup_sel.refcnt from atomic_t to refcount_t") Link: http://lkml.kernel.org/n/tip-l7ovfblq14ip2i08m1g0fkhv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cgroup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 03347748f3fa..0e77bc9e5f3c 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -98,8 +98,10 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) cgrp = counter->cgrp; if (!cgrp) continue; - if (!strcmp(cgrp->name, str)) + if (!strcmp(cgrp->name, str)) { + refcount_inc(&cgrp->refcnt); break; + } cgrp = NULL; } @@ -110,6 +112,7 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) return -1; cgrp->name = str; + refcount_set(&cgrp->refcnt, 1); cgrp->fd = open_cgroup(str); if (cgrp->fd == -1) { @@ -128,12 +131,11 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (refcount_read(&cgrp->refcnt) == 0) + if (refcount_dec_and_test(&cgrp->refcnt)) free(cgrp); return -1; found: - refcount_inc(&cgrp->refcnt); counter->cgrp = cgrp; return 0; } From cb281fea4b0a326d2a2104f8ffae2b6895c561fd Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 18 Jul 2017 18:18:37 -0700 Subject: [PATCH 111/253] perf tools: Add EXCLUDE_EXTLIBS and EXTRA_PERFLIBS to makefile The goal is to allow users to override linking of libraries that were automatically added to PERFLIBS. EXCLUDE_EXTLIBS contains linker flags to be removed from LIBS while EXTRA_PERFLIBS contains linker flags to be added. My use case is to force certain library to be build statically, e.g. for libelf: EXCLUDE_EXTLIBS=-lelf EXTRA_PERFLIBS=path/libelf.a Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Elena Reshetova Cc: Kees Kook Cc: Paul Turner Cc: Stephane Eranian Cc: Sudeep Holla Cc: Wang Nan Link: http://lkml.kernel.org/r/20170719011839.99399-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5008f51a08a2..100a6c1670c8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -33,6 +33,11 @@ include ../scripts/utilities.mak # # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. # +# Define EXCLUDE_EXTLIBS=-lmylib to exclude libmylib from the auto-generated +# EXTLIBS. +# +# Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS. +# # Define NO_DWARF if you do not want debug-info analysis feature at all. # # Define WERROR=0 to disable treating any warnings as errors. @@ -352,7 +357,8 @@ ifdef ASCIIDOC8 export ASCIIDOC8 endif -LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group +EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS)) +LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group ifeq ($(USE_CLANG), 1) CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization From f4849599086c6462d65543637058c9b55f4803e4 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Tue, 18 Jul 2017 18:18:38 -0700 Subject: [PATCH 112/253] perf annotate: Process tracing data in pipe mode 'perf annotate' was missing the handler for tracing data records. Prior to this patch we obtained "unhandled" records when piping trace events to perf annotate (using -D option to show the dump_printf messages in process_event_synth_tracing_data_stub): $ perf record -o - -e block:bio_free sleep 2 | perf annotate -D --stdio ... 0x78 [0xc]: PERF_RECORD_TRACING_DATA: unhandled! ... Signed-off-by: David Carrillo-Cisneros Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Elena Reshetova Cc: Kees Kook Cc: Paul Turner Cc: Stephane Eranian Cc: Sudeep Holla Cc: Wang Nan Link: http://lkml.kernel.org/r/20170719011839.99399-4-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7e33278eff67..6db782dfce96 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -396,6 +396,7 @@ int cmd_annotate(int argc, const char **argv) .namespaces = perf_event__process_namespaces, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, + .tracing_data = perf_event__process_tracing_data, .feature = perf_event__process_feature, .ordered_events = true, .ordering_requires_timestamps = true, From 5d90faf45427dd76fadbe7a4dc4fce3b6f87b550 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 18 Jul 2017 18:18:39 -0700 Subject: [PATCH 113/253] perf jvmti: Fix linker error when libelf config is disabled When libelf is disabled in the configuration, we get the following linker error: LINK libperf-jvmti.so ld: cannot find -lelf Makefile.perf:515: recipe for target 'libperf-jvmti.so' failed Jiri pointed out that both librt and libelf are not really required. So this patch fixes the linker error by getting rid of unwanted libraries in the linker stage. Signed-off-by: Sudeep Holla Acked-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Elena Reshetova Cc: Kees Kook Cc: Paul Turner Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sudeep Holla Cc: Wang Nan Fixes: 209045adc2bb ("perf tools: add JVMTI agent library") Link: http://lkml.kernel.org/r/20170719011839.99399-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 100a6c1670c8..d66f90e6be5c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -518,7 +518,7 @@ $(LIBJVMTI_IN): FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti $(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN) - $(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt + $(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) From 2b04e0f88291c0dc7e12459bd3c3661d42209b4e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Jul 2017 14:12:09 +0200 Subject: [PATCH 114/253] perf evsel: Add verbose output for sys_perf_event_open fallback Adding info about what is being switched off in the sys_perf_event_open fallback. New output (notice the 'switching off' lines): $ perf stat -e '{cycles,instructions}' -vvv ls Using CPUID GenuineIntel-6-3D intel_pt default config: tsc ------------------------------------------------------------ perf_event_attr: size 112 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 3591 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off cloexec flag ------------------------------------------------------------ perf_event_attr: size 112 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 3591 cpu -1 group_fd -1 flags 0 sys_perf_event_open failed, error -22 switching off exclude_guest, exclude_host ------------------------------------------------------------ perf_event_attr: size 112 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP disabled 1 inherit 1 enable_on_exec 1 ------------------------------------------------------------ sys_perf_event_open: pid 3591 cpu -1 group_fd -1 flags 0 sys_perf_event_open failed, error -22 switching off sample_id_all ------------------------------------------------------------ perf_event_attr: size 112 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING|ID|GROUP disabled 1 inherit 1 enable_on_exec 1 ... Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170721121212.21414-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6dd069a41ac3..450b5fadf8cb 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1671,31 +1671,39 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, */ if (!perf_missing_features.write_backward && evsel->attr.write_backward) { perf_missing_features.write_backward = true; + pr_debug2("switching off write_backward\n"); goto out_close; } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { perf_missing_features.clockid_wrong = true; + pr_debug2("switching off clockid\n"); goto fallback_missing_features; } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { perf_missing_features.clockid = true; + pr_debug2("switching off use_clockid\n"); goto fallback_missing_features; } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; + pr_debug2("switching off cloexec flag\n"); goto fallback_missing_features; } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { perf_missing_features.mmap2 = true; + pr_debug2("switching off mmap2\n"); goto fallback_missing_features; } else if (!perf_missing_features.exclude_guest && (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { perf_missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest, exclude_host\n"); goto fallback_missing_features; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; + pr_debug2("switching off sample_id_all\n"); goto retry_sample_id; } else if (!perf_missing_features.lbr_flags && (evsel->attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS))) { perf_missing_features.lbr_flags = true; + pr_debug2("switching off branch sample type no (cycles/flags)\n"); goto fallback_missing_features; } out_close: From 868a832918f621b7576655c00067f20326ef3931 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 5 Jul 2017 18:48:12 -0700 Subject: [PATCH 115/253] perf top: Support lookup of symbols in other mount namespaces. The perf top command needs to unshare its fs from the helper threads in order to successfully setns(2) during its symbol lookup. It also needs to impelement a force flag to ignore ownership of perf-.map files. Signed-off-by: Krister Johansen Cc: Alexander Shishkin Cc: Brendan Gregg Cc: Peter Zijlstra Cc: Thomas-Mich Richter Link: http://lkml.kernel.org/r/1499305693-1599-6-git-send-email-kjlx@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 4 ++++ tools/perf/builtin-top.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index e71d63843f45..d864ea6fd367 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -237,6 +237,10 @@ Default is to monitor all CPUS. --hierarchy:: Enable hierarchy output. +--force:: + Don't do ownership validation. + + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index e5a8f249077f..ee954bde7e3e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -587,6 +587,13 @@ static void *display_thread_tui(void *arg) .refresh = top->delay_secs, }; + /* In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing. + */ + unshare(CLONE_FS); + perf_top__sort_new_samples(top); /* @@ -628,6 +635,13 @@ static void *display_thread(void *arg) struct perf_top *top = arg; int delay_msecs, c; + /* In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing. + */ + unshare(CLONE_FS); + display_setup_sig(); pthread__unblock_sigwinch(); repeat: @@ -1206,6 +1220,7 @@ int cmd_top(int argc, const char **argv) "Show raw trace event output (do not use print fmt or plugins)"), OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), + OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_END() }; const char * const top_usage[] = { From 2ec5cab604b2bbc5abe7138b537199762bea59ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 22 Jul 2017 10:36:10 +0300 Subject: [PATCH 116/253] perf script: Remove some bogus error handling If script_desc__new() fails then the current code has a NULL dereference. We don't actually need to do any cleanup, we can just return NULL. Signed-off-by: Dan Carpenter Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170722073610.nnsyiwdcfl6bhn4t@mwanda Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index d430ff42208a..378f76cdf923 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2199,16 +2199,11 @@ static struct script_desc *script_desc__findnew(const char *name) s = script_desc__new(name); if (!s) - goto out_delete_desc; + return NULL; script_desc__add(s); return s; - -out_delete_desc: - script_desc__delete(s); - - return NULL; } static const char *ends_with(const char *str, const char *suffix) From e9f9a9ca8588e58dc0800b44adc41d32f6fc813a Mon Sep 17 00:00:00 2001 From: Arun Kalyanasundaram Date: Fri, 21 Jul 2017 15:04:18 -0700 Subject: [PATCH 117/253] perf script python: Allocate memory only if handler exists Avoid allocating memory if hook handler is not available. This saves unused memory allocation and simplifies error path. Let handler in python_process_tracepoint point to either tracepoint specific or trace_unhandled hook. Use dict to check if handler points to trace_unhandled. Remove the exit label in python_process_general_event and return when no handler is available. Signed-off-by: Arun Kalyanasundaram Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: David Carrillo-Cisneros Cc: David S. Miller Cc: Peter Zijlstra Cc: Seongjae Park Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170721220422.63962-2-arunkaly@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 57b7a00e6f16..8a8f4829d3e2 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -407,10 +407,7 @@ static void python_process_tracepoint(struct perf_sample *sample, void *data = sample->raw_data; unsigned long long nsecs = sample->time; const char *comm = thread__comm_str(al->thread); - - t = PyTuple_New(MAX_FIELDS); - if (!t) - Py_FatalError("couldn't create Python tuple"); + const char *default_handler_name = "trace_unhandled"; if (!event) { snprintf(handler_name, sizeof(handler_name), @@ -427,10 +424,19 @@ static void python_process_tracepoint(struct perf_sample *sample, handler = get_handler(handler_name); if (!handler) { + handler = get_handler(default_handler_name); + if (!handler) + return; dict = PyDict_New(); if (!dict) Py_FatalError("couldn't create Python dict"); } + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + s = nsecs / NSEC_PER_SEC; ns = nsecs - s * NSEC_PER_SEC; @@ -445,7 +451,7 @@ static void python_process_tracepoint(struct perf_sample *sample, /* ip unwinding */ callchain = python_process_callchain(sample, evsel, al); - if (handler) { + if (!dict) { PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); PyTuple_SetItem(t, n++, PyInt_FromLong(s)); PyTuple_SetItem(t, n++, PyInt_FromLong(ns)); @@ -484,23 +490,23 @@ static void python_process_tracepoint(struct perf_sample *sample, } else { /* FIELD_IS_NUMERIC */ obj = get_field_numeric_entry(event, field, data); } - if (handler) + if (!dict) PyTuple_SetItem(t, n++, obj); else pydict_set_item_string_decref(dict, field->name, obj); } - if (!handler) + if (dict) PyTuple_SetItem(t, n++, dict); if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); - if (handler) { + if (!dict) { call_object(handler, t, handler_name); } else { - try_call_object("trace_unhandled", t); + call_object(handler, t, default_handler_name); Py_DECREF(dict); } @@ -799,6 +805,12 @@ static void python_process_general_event(struct perf_sample *sample, static char handler_name[64]; unsigned n = 0; + snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); + + handler = get_handler(handler_name); + if (!handler) + return; + /* * Use the MAX_FIELDS to make the function expandable, though * currently there is only one item for the tuple. @@ -815,12 +827,6 @@ static void python_process_general_event(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); - - handler = get_handler(handler_name); - if (!handler) - goto exit; - pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( (const char *)&evsel->attr, sizeof(evsel->attr))); @@ -861,7 +867,7 @@ static void python_process_general_event(struct perf_sample *sample, Py_FatalError("error resizing Python tuple"); call_object(handler, t, handler_name); -exit: + Py_DECREF(dict); Py_DECREF(t); } From 892e76b2e8c5e85e69514478e3319575a68b9770 Mon Sep 17 00:00:00 2001 From: Arun Kalyanasundaram Date: Fri, 21 Jul 2017 15:04:19 -0700 Subject: [PATCH 118/253] perf script python: Refactor creation of perf sample dict Move the creation of the dict containing perf_sample entries into a helper function to enable its reuse in other sample processing routines. Signed-off-by: Arun Kalyanasundaram Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: David Carrillo-Cisneros Cc: David S. Miller Cc: Peter Zijlstra Cc: Seongjae Park Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170721220422.63962-3-arunkaly@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 94 +++++++++++-------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 8a8f4829d3e2..69d1b6db96f6 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -391,6 +391,57 @@ static PyObject *python_process_callchain(struct perf_sample *sample, return pylist; } +static PyObject *get_perf_sample_dict(struct perf_sample *sample, + struct perf_evsel *evsel, + struct addr_location *al, + PyObject *callchain) +{ + PyObject *dict, *dict_sample; + + dict = PyDict_New(); + if (!dict) + Py_FatalError("couldn't create Python dictionary"); + + dict_sample = PyDict_New(); + if (!dict_sample) + Py_FatalError("couldn't create Python dictionary"); + + pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( + (const char *)&evsel->attr, sizeof(evsel->attr))); + + pydict_set_item_string_decref(dict_sample, "pid", + PyInt_FromLong(sample->pid)); + pydict_set_item_string_decref(dict_sample, "tid", + PyInt_FromLong(sample->tid)); + pydict_set_item_string_decref(dict_sample, "cpu", + PyInt_FromLong(sample->cpu)); + pydict_set_item_string_decref(dict_sample, "ip", + PyLong_FromUnsignedLongLong(sample->ip)); + pydict_set_item_string_decref(dict_sample, "time", + PyLong_FromUnsignedLongLong(sample->time)); + pydict_set_item_string_decref(dict_sample, "period", + PyLong_FromUnsignedLongLong(sample->period)); + pydict_set_item_string_decref(dict, "sample", dict_sample); + + pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( + (const char *)sample->raw_data, sample->raw_size)); + pydict_set_item_string_decref(dict, "comm", + PyString_FromString(thread__comm_str(al->thread))); + if (al->map) { + pydict_set_item_string_decref(dict, "dso", + PyString_FromString(al->map->dso->name)); + } + if (al->sym) { + pydict_set_item_string_decref(dict, "symbol", + PyString_FromString(al->sym->name)); + } + + pydict_set_item_string_decref(dict, "callchain", callchain); + + return dict; +} + static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) @@ -801,7 +852,7 @@ static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - PyObject *handler, *t, *dict, *callchain, *dict_sample; + PyObject *handler, *t, *dict, *callchain; static char handler_name[64]; unsigned n = 0; @@ -819,48 +870,9 @@ static void python_process_general_event(struct perf_sample *sample, if (!t) Py_FatalError("couldn't create Python tuple"); - dict = PyDict_New(); - if (!dict) - Py_FatalError("couldn't create Python dictionary"); - - dict_sample = PyDict_New(); - if (!dict_sample) - Py_FatalError("couldn't create Python dictionary"); - - pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); - pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( - (const char *)&evsel->attr, sizeof(evsel->attr))); - - pydict_set_item_string_decref(dict_sample, "pid", - PyInt_FromLong(sample->pid)); - pydict_set_item_string_decref(dict_sample, "tid", - PyInt_FromLong(sample->tid)); - pydict_set_item_string_decref(dict_sample, "cpu", - PyInt_FromLong(sample->cpu)); - pydict_set_item_string_decref(dict_sample, "ip", - PyLong_FromUnsignedLongLong(sample->ip)); - pydict_set_item_string_decref(dict_sample, "time", - PyLong_FromUnsignedLongLong(sample->time)); - pydict_set_item_string_decref(dict_sample, "period", - PyLong_FromUnsignedLongLong(sample->period)); - pydict_set_item_string_decref(dict, "sample", dict_sample); - - pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( - (const char *)sample->raw_data, sample->raw_size)); - pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(al->thread))); - if (al->map) { - pydict_set_item_string_decref(dict, "dso", - PyString_FromString(al->map->dso->name)); - } - if (al->sym) { - pydict_set_item_string_decref(dict, "symbol", - PyString_FromString(al->sym->name)); - } - /* ip unwinding */ callchain = python_process_callchain(sample, evsel, al); - pydict_set_item_string_decref(dict, "callchain", callchain); + dict = get_perf_sample_dict(sample, evsel, al, callchain); PyTuple_SetItem(t, n++, dict); if (_PyTuple_Resize(&t, n) == -1) From 74ec14f3893c3065053b91cec850cffa2d565e0a Mon Sep 17 00:00:00 2001 From: Arun Kalyanasundaram Date: Fri, 21 Jul 2017 15:04:20 -0700 Subject: [PATCH 119/253] perf script python: Add sample_read to dict Provide time_enabled, time_running and counter value in the perf_sample dict. Signed-off-by: Arun Kalyanasundaram Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: David Carrillo-Cisneros Cc: David S. Miller Cc: Peter Zijlstra Cc: Seongjae Park Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170721220422.63962-4-arunkaly@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 69d1b6db96f6..55a45784c910 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -391,6 +391,56 @@ static PyObject *python_process_callchain(struct perf_sample *sample, return pylist; } +static PyObject *get_sample_value_as_tuple(struct sample_read_value *value) +{ + PyObject *t; + + t = PyTuple_New(2); + if (!t) + Py_FatalError("couldn't create Python tuple"); + PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id)); + PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value)); + return t; +} + +static void set_sample_read_in_dict(PyObject *dict_sample, + struct perf_sample *sample, + struct perf_evsel *evsel) +{ + u64 read_format = evsel->attr.read_format; + PyObject *values; + unsigned int i; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + pydict_set_item_string_decref(dict_sample, "time_enabled", + PyLong_FromUnsignedLongLong(sample->read.time_enabled)); + } + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + pydict_set_item_string_decref(dict_sample, "time_running", + PyLong_FromUnsignedLongLong(sample->read.time_running)); + } + + if (read_format & PERF_FORMAT_GROUP) + values = PyList_New(sample->read.group.nr); + else + values = PyList_New(1); + + if (!values) + Py_FatalError("couldn't create Python list"); + + if (read_format & PERF_FORMAT_GROUP) { + for (i = 0; i < sample->read.group.nr; i++) { + PyObject *t = get_sample_value_as_tuple(&sample->read.group.values[i]); + PyList_SET_ITEM(values, i, t); + } + } else { + PyObject *t = get_sample_value_as_tuple(&sample->read.one); + PyList_SET_ITEM(values, 0, t); + } + pydict_set_item_string_decref(dict_sample, "values", values); +} + static PyObject *get_perf_sample_dict(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -422,6 +472,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->time)); pydict_set_item_string_decref(dict_sample, "period", PyLong_FromUnsignedLongLong(sample->period)); + set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( From f38d281663b011d1d8a1b0119bb8357706d134a8 Mon Sep 17 00:00:00 2001 From: Arun Kalyanasundaram Date: Fri, 21 Jul 2017 15:04:21 -0700 Subject: [PATCH 120/253] perf script python: Add perf_sample dict to tracepoint handlers The process_event python hook receives a dict with all perf_sample entries, but the tracepoint specific and trace_unhandled hooks predate the introduction of this dict, and do not receive it. Add the aforementioned dict as an additional argument to the affected handlers. To keep backwards compatibility (and avoid unnecessary work), do not pass the dict if the number of arguments signals that handler version predates this change. Signed-off-by: Arun Kalyanasundaram Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: David Carrillo-Cisneros Cc: David S. Miller Cc: Peter Zijlstra Cc: Seongjae Park Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170721220422.63962-5-arunkaly@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 55a45784c910..938b39f6ad31 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -116,6 +116,34 @@ static PyObject *get_handler(const char *handler_name) return handler; } +static int get_argument_count(PyObject *handler) +{ + int arg_count = 0; + + /* + * The attribute for the code object is func_code in Python 2, + * whereas it is __code__ in Python 3.0+. + */ + PyObject *code_obj = PyObject_GetAttrString(handler, + "func_code"); + if (PyErr_Occurred()) { + PyErr_Clear(); + code_obj = PyObject_GetAttrString(handler, + "__code__"); + } + PyErr_Clear(); + if (code_obj) { + PyObject *arg_count_obj = PyObject_GetAttrString(code_obj, + "co_argcount"); + if (arg_count_obj) { + arg_count = (int) PyInt_AsLong(arg_count_obj); + Py_DECREF(arg_count_obj); + } + Py_DECREF(code_obj); + } + return arg_count; +} + static void call_object(PyObject *handler, PyObject *args, const char *die_msg) { PyObject *retval; @@ -499,7 +527,7 @@ static void python_process_tracepoint(struct perf_sample *sample, { struct event_format *event = evsel->tp_format; PyObject *handler, *context, *t, *obj = NULL, *callchain; - PyObject *dict = NULL; + PyObject *dict = NULL, *all_entries_dict = NULL; static char handler_name[256]; struct format_field *field; unsigned long s, ns; @@ -552,6 +580,8 @@ static void python_process_tracepoint(struct perf_sample *sample, /* ip unwinding */ callchain = python_process_callchain(sample, evsel, al); + /* Need an additional reference for the perf_sample dict */ + Py_INCREF(callchain); if (!dict) { PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); @@ -602,6 +632,14 @@ static void python_process_tracepoint(struct perf_sample *sample, if (dict) PyTuple_SetItem(t, n++, dict); + if (get_argument_count(handler) == (int) n + 1) { + all_entries_dict = get_perf_sample_dict(sample, evsel, al, + callchain); + PyTuple_SetItem(t, n++, all_entries_dict); + } else { + Py_DECREF(callchain); + } + if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); @@ -612,6 +650,7 @@ static void python_process_tracepoint(struct perf_sample *sample, Py_DECREF(dict); } + Py_XDECREF(all_entries_dict); Py_DECREF(t); } From a641860550f05a4b8889dca61aab73c84b2d5e16 Mon Sep 17 00:00:00 2001 From: Arun Kalyanasundaram Date: Fri, 21 Jul 2017 15:04:22 -0700 Subject: [PATCH 121/253] perf script python: Generate hooks with additional argument Modify the signature of tracepoint specific and trace_unhandled hooks to add the perf_sample dict as a new argument. Create a python helper function to print a dictionary. Signed-off-by: Arun Kalyanasundaram Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: David Carrillo-Cisneros Cc: David S. Miller Cc: Peter Zijlstra Cc: Seongjae Park Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170721220422.63962-6-arunkaly@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 938b39f6ad31..c7187f067d31 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1367,6 +1367,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s", f->name); } + if (not_first++) + fprintf(ofp, ", "); + if (++count % 5 == 0) + fprintf(ofp, "\n\t\t"); + fprintf(ofp, "perf_sample_dict"); + fprintf(ofp, "):\n"); fprintf(ofp, "\t\tprint_header(event_name, common_cpu, " @@ -1436,6 +1442,9 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, ")\n\n"); + fprintf(ofp, "\t\tprint 'Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])"); @@ -1446,15 +1455,20 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) } fprintf(ofp, "def trace_unhandled(event_name, context, " - "event_fields_dict):\n"); + "event_fields_dict, perf_sample_dict):\n"); - fprintf(ofp, "\t\tprint ' '.join(['%%s=%%s'%%(k,str(v))" - "for k,v in sorted(event_fields_dict.items())])\n\n"); + fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n"); + fprintf(ofp, "\t\tprint 'Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); fprintf(ofp, "def print_header(" "event_name, cpu, secs, nsecs, pid, comm):\n" "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" - "(event_name, cpu, secs, nsecs, pid, comm),\n"); + "(event_name, cpu, secs, nsecs, pid, comm),\n\n"); + + fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n" + "\treturn delimiter.join" + "(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n"); fclose(ofp); From b49a821ed9e05fa0ccbaec2555052b2a920be517 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 8 May 2017 18:43:02 +0800 Subject: [PATCH 122/253] perf report: Make --branch-history work without callgraphs(-g) option in perf record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf record -b -g perf report --branch-history This merges the LBRs with the callgraphs. However it would be nice if it also works without callgraphs (-g) set in perf record, so that only the LBRs are displayed. But currently perf report errors in this case. For example, perf record -b perf report --branch-history Error: Selected -g or --branch-history but no callchain data. Did you call 'perf record' without -g? This patch displays the LBRs only even if callgraphs(-g) is not enabled in perf record. Change log: v2: According to Milian Wolff's comment, change the obsolete error message. Now the error message is: ┌─Error:─────────────────────────────────────┐ │Selected -g or --branch-history. │ │But no callchain or branch data. │ │Did you call 'perf record' without -g or -b?│ │ │ │ │ │Press any key... │ └────────────────────────────────────────────┘ When passing the last parameter to hists__fprintf, changes "|" to "||". hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout, symbol_conf.use_callchain || symbol_conf.show_branchflag_count); Signed-off-by: Yao Jin Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1494240182-28899-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 12 +++++++----- tools/perf/util/callchain.c | 7 ++++--- tools/perf/util/hist.c | 2 ++ tools/perf/util/machine.c | 13 ++++++++++++- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 983b238d5eea..bace3429c030 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -279,10 +279,11 @@ static int report__setup_sample_type(struct report *rep) "'perf record' without -g?\n"); return -EINVAL; } - if (symbol_conf.use_callchain) { - ui__error("Selected -g or --branch-history but no " - "callchain data. Did\n" - "you call 'perf record' without -g?\n"); + if (symbol_conf.use_callchain && + !symbol_conf.show_branchflag_count) { + ui__error("Selected -g or --branch-history.\n" + "But no callchain or branch data.\n" + "Did you call 'perf record' without -g or -b?\n"); return -1; } } else if (!callchain_param.enabled && @@ -417,7 +418,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, hists__fprintf_nr_sample_events(hists, rep, evname, stdout); hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout, - symbol_conf.use_callchain); + symbol_conf.use_callchain || + symbol_conf.show_branchflag_count); fprintf(stdout, "\n\n"); } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 02130e2e72c7..1f536418dfb5 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1010,11 +1010,11 @@ int sample__resolve_callchain(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, int max_stack) { - if (sample->callchain == NULL) + if (sample->callchain == NULL && !symbol_conf.show_branchflag_count) return 0; if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || - perf_hpp_list.parent) { + perf_hpp_list.parent || symbol_conf.show_branchflag_count) { return thread__resolve_callchain(al->thread, cursor, evsel, sample, parent, al, max_stack); } @@ -1023,7 +1023,8 @@ int sample__resolve_callchain(struct perf_sample *sample, int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample) { - if (!symbol_conf.use_callchain || sample->callchain == NULL) + if ((!symbol_conf.use_callchain || sample->callchain == NULL) && + !symbol_conf.show_branchflag_count) return 0; return callchain_append(he->callchain, &callchain_cursor, sample->period); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2f6c5e6c16f9..9453b2e27015 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1759,6 +1759,8 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro else use_callchain = symbol_conf.use_callchain; + use_callchain |= symbol_conf.show_branchflag_count; + output_resort(evsel__hists(evsel), prog, use_callchain, NULL); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 79d08ea694da..d4df353051af 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1902,13 +1902,16 @@ static int thread__resolve_callchain_sample(struct thread *thread, { struct branch_stack *branch = sample->branch_stack; struct ip_callchain *chain = sample->callchain; - int chain_nr = chain->nr; + int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; int nr_loop_iter; + if (chain) + chain_nr = chain->nr; + if (perf_evsel__has_branch_callstack(evsel)) { err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, root_al, max_stack); @@ -1946,6 +1949,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { if (callchain_param.order == ORDER_CALLEE) { be[i] = branch->entries[i]; + + if (chain == NULL) + continue; + /* * Check for overlap into the callchain. * The return address is one off compared to @@ -2000,6 +2007,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (err) return err; } + + if (chain_nr == 0) + return 0; + chain_nr -= nr; } From a1a8bed32de197801bb861fbf13cd01496df3e05 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 24 Jul 2017 19:09:07 +0800 Subject: [PATCH 123/253] perf report: Tag branch type/flag on "to" and tag cycles on "from" Current --branch-history LBR annotation displays confused data. For example, each cycles report is duplicated on both "from" and "to" entries. For example: perf report --branch-history --no-children --stdio --2.32%--main div.c:39 (COND_BWD CROSS_2M predicted:49.7% cycles:1) main div.c:44 (predicted:49.7% cycles:1) main div.c:42 (RET CROSS_2M cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (RET CROSS_2M cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M cycles:9) The cycles should be tagged only on the "from". It's for the code block that ends with "from", not for "to". Another issue is the "predicted:49.7%" is duplicated too (tag on both "from" and "to"). This patch tags the branch type/flag on "to" and tag the cycles on "from". For example: --2.32%--main div.c:39 (COND_BWD CROSS_2M predicted:49.7%) main div.c:44 (cycles:1) main div.c:42 (RET CROSS_2M) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (RET CROSS_2M) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M) | --2.23%--__random_r random_r.c:392 (cycles:9) In this example, The "main div.c:39 (COND_BWD CROSS_2M predicted:49.7%)" is "to" of branch and "main div.c:44 (cycles:1)" is "from" of branch. It should be easier for understanding than before. Signed-off-by: Yao Jin Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1500894547-18411-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/branch.h | 11 +-- tools/perf/util/callchain.c | 136 ++++++++++++++++++++++++++---------- 2 files changed, 105 insertions(+), 42 deletions(-) diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 686f2b65ba84..1e3c7c5cdc63 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -5,11 +5,12 @@ #include "../perf.h" struct branch_type_stat { - u64 counts[PERF_BR_MAX]; - u64 cond_fwd; - u64 cond_bwd; - u64 cross_4k; - u64 cross_2m; + bool branch_to; + u64 counts[PERF_BR_MAX]; + u64 cond_fwd; + u64 cond_bwd; + u64 cross_4k; + u64 cross_2m; }; struct branch_flags; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1f536418dfb5..f320b0777e0d 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -563,20 +563,33 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) if (cursor_node->branch) { call->branch_count = 1; - if (cursor_node->branch_flags.predicted) - call->predicted_count = 1; + if (cursor_node->branch_from) { + /* + * branch_from is set with value somewhere else + * to imply it's "to" of a branch. + */ + call->brtype_stat.branch_to = true; - if (cursor_node->branch_flags.abort) - call->abort_count = 1; + if (cursor_node->branch_flags.predicted) + call->predicted_count = 1; - call->cycles_count = cursor_node->branch_flags.cycles; - call->iter_count = cursor_node->nr_loop_iter; - call->samples_count = cursor_node->samples; + if (cursor_node->branch_flags.abort) + call->abort_count = 1; - branch_type_count(&call->brtype_stat, - &cursor_node->branch_flags, - cursor_node->branch_from, - cursor_node->ip); + branch_type_count(&call->brtype_stat, + &cursor_node->branch_flags, + cursor_node->branch_from, + cursor_node->ip); + } else { + /* + * It's "from" of a branch + */ + call->brtype_stat.branch_to = false; + call->cycles_count = + cursor_node->branch_flags.cycles; + call->iter_count = cursor_node->nr_loop_iter; + call->samples_count = cursor_node->samples; + } } list_add_tail(&call->list, &node->val); @@ -685,20 +698,32 @@ static enum match_result match_chain(struct callchain_cursor_node *node, if (node->branch) { cnode->branch_count++; - if (node->branch_flags.predicted) - cnode->predicted_count++; + if (node->branch_from) { + /* + * It's "to" of a branch + */ + cnode->brtype_stat.branch_to = true; - if (node->branch_flags.abort) - cnode->abort_count++; + if (node->branch_flags.predicted) + cnode->predicted_count++; - cnode->cycles_count += node->branch_flags.cycles; - cnode->iter_count += node->nr_loop_iter; - cnode->samples_count += node->samples; + if (node->branch_flags.abort) + cnode->abort_count++; - branch_type_count(&cnode->brtype_stat, - &node->branch_flags, - node->branch_from, - node->ip); + branch_type_count(&cnode->brtype_stat, + &node->branch_flags, + node->branch_from, + node->ip); + } else { + /* + * It's "from" of a branch + */ + cnode->brtype_stat.branch_to = false; + cnode->cycles_count += + node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->samples_count += node->samples; + } } return MATCH_EQ; @@ -1236,27 +1261,26 @@ static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int return printed; } -static int count_float_printf(int idx, const char *str, float value, char *bf, int bfsize) +static int count_float_printf(int idx, const char *str, float value, + char *bf, int bfsize, float threshold) { int printed; + if (threshold != 0.0 && value < threshold) + return 0; + printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value); return printed; } -static int counts_str_build(char *bf, int bfsize, - u64 branch_count, u64 predicted_count, - u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count, - struct branch_type_stat *brtype_stat) +static int branch_to_str(char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, + struct branch_type_stat *brtype_stat) { - u64 cycles; int printed, i = 0; - if (branch_count == 0) - return scnprintf(bf, bfsize, " (calltrace)"); - printed = branch_type_str(brtype_stat, bf, bfsize); if (printed) i++; @@ -1264,15 +1288,29 @@ static int counts_str_build(char *bf, int bfsize, if (predicted_count < branch_count) { printed += count_float_printf(i++, "predicted", predicted_count * 100.0 / branch_count, - bf + printed, bfsize - printed); + bf + printed, bfsize - printed, 0.0); } if (abort_count) { printed += count_float_printf(i++, "abort", abort_count * 100.0 / branch_count, - bf + printed, bfsize - printed); + bf + printed, bfsize - printed, 0.1); } + if (i) + printed += scnprintf(bf + printed, bfsize - printed, ")"); + + return printed; +} + +static int branch_from_str(char *bf, int bfsize, + u64 branch_count, + u64 cycles_count, u64 iter_count, + u64 samples_count) +{ + int printed = 0, i = 0; + u64 cycles; + cycles = cycles_count / branch_count; if (cycles) { printed += count_pri64_printf(i++, "cycles", @@ -1287,10 +1325,34 @@ static int counts_str_build(char *bf, int bfsize, } if (i) - return scnprintf(bf + printed, bfsize - printed, ")"); + printed += scnprintf(bf + printed, bfsize - printed, ")"); - bf[0] = 0; - return 0; + return printed; +} + +static int counts_str_build(char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count, + struct branch_type_stat *brtype_stat) +{ + int printed; + + if (branch_count == 0) + return scnprintf(bf, bfsize, " (calltrace)"); + + if (brtype_stat->branch_to) { + printed = branch_to_str(bf, bfsize, branch_count, + predicted_count, abort_count, brtype_stat); + } else { + printed = branch_from_str(bf, bfsize, branch_count, + cycles_count, iter_count, samples_count); + } + + if (!printed) + bf[0] = 0; + + return printed; } static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, From 3f056b66647bafc39f060a57289320765915972c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 24 Jul 2017 17:16:38 -0700 Subject: [PATCH 124/253] perf jevents: Make build fail on JSON parse error Today, when a JSON file fails parsing the build continues, but there are no json files built in, which is difficult to debug later. Make the build stop on a parse error instead. v2: Add fixes from Sukadev. Now we handle architectures with no JSON events correctly. And fix some stale comments. Committer note: Tested by running the cross build container tests, that were all failing for v1. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20170725001638.19990-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/README | 4 ---- tools/perf/pmu-events/jevents.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index 1408ade0d773..c2ee3e4417fe 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -85,10 +85,6 @@ users to specify events by their name: where 'pm_1plus_ppc_cmpl' is a Power8 PMU event. -In case of errors when processing files in the tools/perf/pmu-events/arch -directory, 'jevents' tries to create an empty mapping file to allow the perf -build to succeed even if the PMU event aliases cannot be used. - However some errors in processing may cause the perf build to fail. Mapfile format diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index bd0aabb2bd0f..2350f6099a46 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -822,10 +822,6 @@ static int process_one_file(const char *fpath, const struct stat *sb, * PMU event tables (see struct pmu_events_map). * * Write out the PMU events tables and the mapping table to pmu-event.c. - * - * If unable to process the JSON or arch files, create an empty mapping - * table so we can continue to build/use perf even if we cannot use the - * PMU event aliases. */ int main(int argc, char *argv[]) { @@ -836,6 +832,7 @@ int main(int argc, char *argv[]) const char *arch; const char *output_file; const char *start_dirname; + struct stat stbuf; prog = basename(argv[0]); if (argc < 4) { @@ -857,11 +854,17 @@ int main(int argc, char *argv[]) return 2; } + sprintf(ldirname, "%s/%s", start_dirname, arch); + + /* If architecture does not have any event lists, bail out */ + if (stat(ldirname, &stbuf) < 0) { + pr_info("%s: Arch %s has no PMU event lists\n", prog, arch); + goto empty_map; + } + /* Include pmu-events.h first */ fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n"); - sprintf(ldirname, "%s/%s", start_dirname, arch); - /* * The mapfile allows multiple CPUids to point to the same JSON file, * so, not sure if there is a need for symlinks within the pmu-events @@ -878,6 +881,9 @@ int main(int argc, char *argv[]) if (rc && verbose) { pr_info("%s: Error walking file tree %s\n", prog, ldirname); goto empty_map; + } else if (rc < 0) { + /* Make build fail */ + return 1; } else if (rc) { goto empty_map; } @@ -892,7 +898,8 @@ int main(int argc, char *argv[]) if (process_mapfile(eventsfp, mapfile)) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); - goto empty_map; + /* Make build fail */ + return 1; } return 0; From 38d2dcd0ccf85b55d783edbfc14fd8dea4d55b73 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Tue, 25 Jul 2017 06:28:42 +0900 Subject: [PATCH 125/253] perf annotate stdio: Fix column header when using --show-total-period Currently the first column header is always "Percent", fix it to show correct column name based on given options, i.e. if using --show-total-period, show "Event count" as a first column. Reported-by: Milian Wolff Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/c3c902e7-95bc-16d4-366f-12eb034c5c8d@gmail.com [ Extracted from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 004072f82511..c2b4b00166ed 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1824,7 +1824,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, width *= evsel->nr_members; graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", - width, width, "Percent", d_filename, evsel_name, h->nr_samples); + width, width, symbol_conf.show_total_period ? "Event count" : "Percent", + d_filename, evsel_name, h->nr_samples); printf("%-*.*s----\n", graph_dotted_len, graph_dotted_len, graph_dotted_line); From 62e6039f02888efdd824e8c596c4927616a97ce3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 25 Jul 2017 14:14:27 -0300 Subject: [PATCH 126/253] perf tools: Add tools/include/uapi/asm-generic/fcntl.h to the MANIFEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This file was copied from the kernel so that we could build tools/perf/ on older systems where some newer defines, such as these are available: CC trace/beauty/fcntl.o trace/beauty/fcntl.c: In function ‘syscall_arg__scnprintf_fcntl_arg’: trace/beauty/fcntl.c:93:13: error: ‘F_OFD_SETLK’ undeclared (first use in this function) cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK || ^ trace/beauty/fcntl.c:93:13: note: each undeclared identifier is reported only once for each function it appears in trace/beauty/fcntl.c:93:35: error: ‘F_OFD_SETLKW’ undeclared (first use in this function) cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK || ^ trace/beauty/fcntl.c:93:58: error: ‘F_OFD_GETLK’ undeclared (first use in this function) cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK || ^ mv: cannot stat ‘trace/beauty/.fcntl.o.tmp’: No such file or directory make[4]: *** [trace/beauty/fcntl.o] Error 1 make[3]: *** [trace/beauty] Error 2 make[3]: *** Waiting for unfinished jobs.... CC tests/llvm.o But we need to make sure that it is also in the tools/perf/MANIFEST file, that is used to build a tarball for detached (from the kernel sources) compilation, which was failing, with the above message, on a RHEL7.4 system, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 84d1d8a12df3 ("tools include uapi asm-generic: Grab a copy of fcntl.h") Link: http://lkml.kernel.org/n/tip-2d5px7aq5stbwi24pgirwtlm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 9f2267b82eb2..705bdb147e73 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -70,6 +70,7 @@ tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h +tools/include/uapi/asm-generic/fcntl.h tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h tools/include/uapi/linux/bpf.h From de63403bfd14ae8d613f30c9a0d415581b4cb37e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Jul 2017 14:02:04 +0200 Subject: [PATCH 127/253] perf tools: Add perf_evsel__read_size function Currently we use the size of struct perf_counts_values to read the event, which prevents us to put any new member to the struct. Adding perf_evsel__read_size to return size of the buffer needed for event read. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170726120206.9099-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 450b5fadf8cb..4dd0fcc06db9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1261,15 +1261,42 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } +static int perf_evsel__read_size(struct perf_evsel *evsel) +{ + u64 read_format = evsel->attr.read_format; + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (read_format & PERF_FORMAT_GROUP) { + nr = evsel->nr_members; + size += sizeof(u64); + } + + size += entry * nr; + return size; +} + int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { + size_t size = perf_evsel__read_size(evsel); + memset(count, 0, sizeof(*count)); if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) + if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) return -errno; return 0; From f7794d525447f1e4e4b2228dd29dba084005e6bf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Jul 2017 14:02:05 +0200 Subject: [PATCH 128/253] perf evsel: Add read_counter() Add perf_evsel__read_counter() to read single or group counter. After calling this function the counter's evsel::counts struct is filled with values for the counter and member of its group if there are any. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170726120206.9099-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 100 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 2 + tools/perf/util/stat.c | 4 ++ tools/perf/util/stat.h | 5 +- 4 files changed, 109 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4dd0fcc06db9..89aecf3a35c7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1302,6 +1302,106 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, return 0; } +static int +perf_evsel__read_one(struct perf_evsel *evsel, int cpu, int thread) +{ + struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); + + return perf_evsel__read(evsel, cpu, thread, count); +} + +static void +perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread, + u64 val, u64 ena, u64 run) +{ + struct perf_counts_values *count; + + count = perf_counts(counter->counts, cpu, thread); + + count->val = val; + count->ena = ena; + count->run = run; +} + +static int +perf_evsel__process_group_data(struct perf_evsel *leader, + int cpu, int thread, u64 *data) +{ + u64 read_format = leader->attr.read_format; + struct sample_read_value *v; + u64 nr, ena = 0, run = 0, i; + + nr = *data++; + + if (nr != (u64) leader->nr_members) + return -EINVAL; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + ena = *data++; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + run = *data++; + + v = (struct sample_read_value *) data; + + perf_evsel__set_count(leader, cpu, thread, + v[0].value, ena, run); + + for (i = 1; i < nr; i++) { + struct perf_evsel *counter; + + counter = perf_evlist__id2evsel(leader->evlist, v[i].id); + if (!counter) + return -EINVAL; + + perf_evsel__set_count(counter, cpu, thread, + v[i].value, ena, run); + } + + return 0; +} + +static int +perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) +{ + struct perf_stat_evsel *ps = leader->priv; + u64 read_format = leader->attr.read_format; + int size = perf_evsel__read_size(leader); + u64 *data = ps->group_data; + + if (!(read_format & PERF_FORMAT_ID)) + return -EINVAL; + + if (!perf_evsel__is_group_leader(leader)) + return -EINVAL; + + if (!data) { + data = zalloc(size); + if (!data) + return -ENOMEM; + + ps->group_data = data; + } + + if (FD(leader, cpu, thread) < 0) + return -EINVAL; + + if (readn(FD(leader, cpu, thread), data, size) <= 0) + return -errno; + + return perf_evsel__process_group_data(leader, cpu, thread, data); +} + +int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread) +{ + u64 read_format = evsel->attr.read_format; + + if (read_format & PERF_FORMAT_GROUP) + return perf_evsel__read_group(evsel, cpu, thread); + else + return perf_evsel__read_one(evsel, cpu, thread); +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index fb40ca3c6519..de03c18daaf0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -299,6 +299,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); +int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread); + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 53b9a994a3dc..35e9848734d6 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -128,6 +128,10 @@ static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { + struct perf_stat_evsel *ps = evsel->priv; + + if (ps) + free(ps->group_data); zfree(&evsel->priv); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 7522bf10b03e..eacaf958e19d 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -28,8 +28,9 @@ enum perf_stat_evsel_id { }; struct perf_stat_evsel { - struct stats res_stats[3]; - enum perf_stat_evsel_id id; + struct stats res_stats[3]; + enum perf_stat_evsel_id id; + u64 *group_data; }; enum aggr_mode { From 82bf311e15d22e2fa45423b1fb4a21cf925381fe Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Jul 2017 14:02:06 +0200 Subject: [PATCH 129/253] perf stat: Use group read for event groups Make perf stat use group read if there are groups defined. The group read will get the values for all member of groups within a single syscall instead of calling read syscall for every event. We can see considerable less amount of kernel cycles spent on single group read, than reading each event separately, like for following perf stat command: # perf stat -e {cycles,instructions} -I 10 -a sleep 1 Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'" Before: 24,325,676 cycles:u 297,040,775 cycles:k 1.038554134 seconds time elapsed After: 25,034,418 cycles:u 158,256,395 cycles:k 1.036864497 seconds time elapsed The perf_evsel__open fallback changes contributed by Andi Kleen. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 30 +++++++++++++++++++++++++++--- tools/perf/util/counts.h | 1 + tools/perf/util/evsel.c | 10 ++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 48ac53b199fc..866da7aa54bf 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -213,10 +213,20 @@ static void perf_stat__reset_stats(void) static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; + struct perf_evsel *leader = evsel->leader; - if (stat_config.scale) + if (stat_config.scale) { attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* + * The event is part of non trivial group, let's enable + * the group read (for leader) and ID retrieval for all + * members. + */ + if (leader->nr_members > 1) + attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; attr->inherit = !no_inherit; @@ -333,13 +343,21 @@ static int read_counter(struct perf_evsel *counter) struct perf_counts_values *count; count = perf_counts(counter->counts, cpu, thread); - if (perf_evsel__read(counter, cpu, thread, count)) { + + /* + * The leader's group read loads data into its group members + * (via perf_evsel__read_counter) and sets threir count->loaded. + */ + if (!count->loaded && + perf_evsel__read_counter(counter, cpu, thread)) { counter->counts->scaled = -1; perf_counts(counter->counts, cpu, thread)->ena = 0; perf_counts(counter->counts, cpu, thread)->run = 0; return -1; } + count->loaded = false; + if (STAT_RECORD) { if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { pr_err("failed to write stat event\n"); @@ -559,6 +577,11 @@ static int store_counter_ids(struct perf_evsel *counter) return __store_counter_ids(counter, cpus, threads); } +static bool perf_evsel__should_store_id(struct perf_evsel *counter) +{ + return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; +} + static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; @@ -631,7 +654,8 @@ static int __run_perf_stat(int argc, const char **argv) if (l > unit_width) unit_width = l; - if (STAT_RECORD && store_counter_ids(counter)) + if (perf_evsel__should_store_id(counter) && + store_counter_ids(counter)) return -1; } diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 34d8baaf558a..cb45a6aecf9d 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -12,6 +12,7 @@ struct perf_counts_values { }; u64 values[3]; }; + bool loaded; }; struct perf_counts { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 89aecf3a35c7..3735c9e0080d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -49,6 +49,7 @@ static struct { bool clockid_wrong; bool lbr_flags; bool write_backward; + bool group_read; } perf_missing_features; static clockid_t clockid; @@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread, count->val = val; count->ena = ena; count->run = run; + count->loaded = true; } static int @@ -1677,6 +1679,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (perf_missing_features.lbr_flags) evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | PERF_SAMPLE_BRANCH_NO_CYCLES); + if (perf_missing_features.group_read && evsel->attr.inherit) + evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1832,6 +1836,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, perf_missing_features.lbr_flags = true; pr_debug2("switching off branch sample type no (cycles/flags)\n"); goto fallback_missing_features; + } else if (!perf_missing_features.group_read && + evsel->attr.inherit && + (evsel->attr.read_format & PERF_FORMAT_GROUP)) { + perf_missing_features.group_read = true; + pr_debug2("switching off group read\n"); + goto fallback_missing_features; } out_close: do { From c6c13be76c1fc8a3169dbd29cd1d42af1d64773f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Jul 2017 16:52:25 -0300 Subject: [PATCH 130/253] perf annotate: Do not overwrite perf_sample->weight When we parse an event we may get a value from the kernel in response to PERF_SAMPLE_WEIGHT being set in perf_event_attr->sample_type, and if it is not set, then perf_sample->weight will be set to zero, which should be ok according to a discussion with Andi Kleen [1]: 1: https://lkml.kernel.org/r/20170724174637.GS3044@two.firstfloor.org Acked-by: Andi Kleen Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-8ev8ufk3lzmvgz37yg9nv3qz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6db782dfce96..658c920d74b9 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -177,8 +177,6 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, */ process_branch_stack(sample->branch_stack, al, sample); - sample->weight = 1; - he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); if (he == NULL) return -ENOMEM; From 64831a21db13495008665b33878e31b48c1e3dc7 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Thu, 20 Jul 2017 22:11:57 -0700 Subject: [PATCH 131/253] perf sort: Use default sort if evlist is empty Fixes bug noted by Jiri in https://lkml.org/lkml/2017/6/13/755 and caused by commit d49dadea7862 ("perf tools: Make 'trace' or 'trace_fields' sort key default for tracepoint events") not taking into account that evlist is empty in pipe-mode. Before this commit, pipe mode will only show bogus "100.00% N/A" instead of correct output as follows: $ perf record -o - sleep 1 | perf report -i - # To display the perf.data header info, please use --header/--header-only options. # [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] # # Total Lost Samples: 0 # # Samples: 8 of event 'cycles:ppH' # Event count (approx.): 145658 # # Overhead Trace output # ........ ............ # 100.00% N/A Correct output, after patch: $ perf record -o - sleep 1 | perf report -i - # To display the perf.data header info, please use --header/--header-only options. # [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] # # Total Lost Samples: 0 # # Samples: 8 of event 'cycles:ppH' # Event count (approx.): 191331 # # Overhead Command Shared Object Symbol # ........ ....... ................. ................................. # 81.63% sleep libc-2.19.so [.] _exit 13.58% sleep ld-2.19.so [.] do_lookup_x 2.34% sleep [kernel.kallsyms] [k] context_switch 2.34% sleep libc-2.19.so [.] __GI___libc_nanosleep 0.11% perf [kernel.kallsyms] [k] __intel_pmu_enable_a Reported-by: Jiri Olsa Report-Link: https://lkml.kernel.org/r/20170613185422.GA6092@krava Signed-off-by: David Carrillo-Cisneros Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Paul Turner Cc: Simon Que Cc: Stephane Eranian Cc: Wang Nan Fixes: d49dadea7862 ("perf tools: Make 'trace' or 'trace_fields' sort key default for tracepoint events") Link: https://lkml.kernel.org/r/20170721051157.47331-1-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.h | 5 +++++ tools/perf/util/sort.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0843746bc389..bf2c4936e35f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -265,6 +265,11 @@ bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list); +static inline bool perf_evlist__empty(struct perf_evlist *evlist) +{ + return list_empty(&evlist->entries); +} + static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { return list_entry(evlist->entries.next, struct perf_evsel, node); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8b327c955a4f..12359bd986db 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2563,7 +2563,7 @@ static const char *get_default_sort_order(struct perf_evlist *evlist) BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders)); - if (evlist == NULL) + if (evlist == NULL || perf_evlist__empty(evlist)) goto out_no_evlist; evlist__for_each_entry(evlist, evsel) { From ce9ee4a2de20062a97ad50ecc11ebda7e7618fd1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 26 Jul 2017 17:16:46 -0300 Subject: [PATCH 132/253] perf annotate stdio: Set enough columns for --show-total-period Now that we set the first column header according to wether --show-total-period is being used, we need to size it accordingly. Based-on-a-patch-by: Taeung Song Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-pu504ffnit4m334k09hxcbs3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c2b4b00166ed..5125c2bbacaa 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1142,7 +1142,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st color = get_percent_color(percent); if (symbol_conf.show_total_period) - color_fprintf(stdout, color, " %7" PRIu64, + color_fprintf(stdout, color, " %11" PRIu64, sample.period); else color_fprintf(stdout, color, " %7.2f", percent); @@ -1165,7 +1165,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st } else if (max_lines && printed >= max_lines) return 1; else { - int width = 8; + int width = symbol_conf.show_total_period ? 12 : 8; if (queue) return -1; @@ -1806,7 +1806,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int printed = 2, queue_len = 0; int more = 0; u64 len; - int width = 8; + int width = symbol_conf.show_total_period ? 12 : 8; int graph_dotted_len; filename = strdup(dso->long_name); From 48cc33085253d607706e68a67ac98fe2a6abdd3d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 28 Jul 2017 12:49:02 -0300 Subject: [PATCH 133/253] perf annotate: Fix storing per line sym_hist_entry The existing loop incremented the offset while using it as the array index, when we went to an array of sym_hist_entry instances, we should've moved the increment to outside of the array element reference, oops, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Taeung Song Cc: Wang Nan Fixes: 461c17f00f40 ("perf annotate: Store the sample period in each histogram bucket") Link: http://lkml.kernel.org/n/tip-s3dm6uyrazlpag3f0psfia07@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5125c2bbacaa..2dab0e5a7f2f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -963,8 +963,9 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, u64 period = 0; while (offset < end) { - hits += h->addr[offset++].nr_samples; - period += h->addr[offset++].period; + hits += h->addr[offset].nr_samples; + period += h->addr[offset].period; + ++offset; } if (h->nr_samples) { From bb79a232b0881ce47f921a53f40612a9f9996482 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Jul 2017 11:24:59 -0300 Subject: [PATCH 134/253] perf annotate TUI: Use sym_hist_entry in disasm_line_samples Just paving the way to fix --show-total-period in the TUI, i.e. now we save in struct disasm_line_samples not just the number of samples, but also the total period. Based-on-a-patch-by: Taeung Song Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-1sup5hkwrxocjvrmrmhs732o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index dbe4e630b90f..680fff70f7a0 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -17,8 +17,8 @@ #include struct disasm_line_samples { - double percent; - u64 nr; + double percent; + struct sym_hist_entry he; }; #define IPC_WIDTH 6 @@ -152,7 +152,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].nr); + bdl->samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", bdl->samples[i].percent); @@ -457,7 +457,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, pos->offset, next ? next->offset : len, &path, &sample); - bpos->samples[i].nr = sample.nr_samples; + bpos->samples[i].he = sample; if (max_percent < bpos->samples[i].percent) max_percent = bpos->samples[i].percent; From 29dc267f270a4ad5ae1341e7fdc8539ac7dc907a Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 27 Jul 2017 11:33:20 -0300 Subject: [PATCH 135/253] perf annotate TUI: Fix --show-total-period MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were showing the number of samples, not the total period, fix it. Reported-by: Namhyung Kim Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Martin Liška Cc: Milian Wolff Cc: Jiri Olsa Fixes: 0c4a5bcea460 ("perf annotate: Display total number of samples with --show-total-period") Link: http://lkml.kernel.org/r/1500500223-16753-1-git-send-email-treeze.taeung@gmail.com [ extracted from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 680fff70f7a0..c382b1d8af42 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -152,7 +152,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].he.nr_samples); + bdl->samples[i].he.period); } else { ui_browser__printf(browser, "%6.2f ", bdl->samples[i].percent); From bc1e5d60cebb711ca3783a87a969d18db376d357 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 27 Jul 2017 12:05:58 -0300 Subject: [PATCH 136/253] perf annotate TUI: Clarify calculation of column header widths In commit f8f4aaead579 ("perf annotate: Finally display IPC and cycle accounting") the 'pcnt_width' variable was abused in a few places to also include the optional width of the "IPC" and "cycles" columns, while in other places we stopped using 'pcnt_width' and instead its previous equation... Now that we need to tap into annotate_browser__pcnt_width() to consider if --show-total-period is being used and instead of that hardcoded 7 (strlen("Percent")) we need to use it or strlen("Event count") we need this properly clarified to avoid having to touch all the (7 * nr_events) places. Clarify this by introducing a separate annotate_browser__cycles_width() to leave the pcnt_width calculate just what its name implies. Cc: Taeung Song Cc: Milian Wolff Cc: Namhyung Kim Cc: Jiri Olsa Cc: Andi Kleen Link: http://lkml.kernel.org/n/tip-szgb07t4k5wtvks8nzwkg710@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index c382b1d8af42..0f4bcc0d140c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -110,11 +110,12 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br static int annotate_browser__pcnt_width(struct annotate_browser *ab) { - int w = 7 * ab->nr_events; + return 7 * ab->nr_events; +} - if (ab->have_cycles) - w += IPC_WIDTH + CYCLES_WIDTH; - return w; +static int annotate_browser__cycles_width(struct annotate_browser *ab) +{ + return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0; } static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) @@ -127,7 +128,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int (!current_entry || (browser->use_navkeypressed && !browser->navkeypressed))); int width = browser->width, printed; - int i, pcnt_width = annotate_browser__pcnt_width(ab); + int i, pcnt_width = annotate_browser__pcnt_width(ab), + cycles_width = annotate_browser__cycles_width(ab); double percent_max = 0.0; char bf[256]; bool show_title = false; @@ -162,7 +164,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_percent_color(browser, 0, current_entry); if (!show_title) - ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); + ui_browser__write_nstring(browser, " ", pcnt_width); else ui_browser__printf(browser, "%*s", 7, "Percent"); } @@ -190,7 +192,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int width += 1; if (!*dl->line) - ui_browser__write_nstring(browser, " ", width - pcnt_width); + ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); else if (dl->offset == -1) { if (dl->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", @@ -199,7 +201,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1); + ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width - cycles_width + 1); } else { u64 addr = dl->offset; int color = -1; @@ -256,7 +258,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); - ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed); + ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed); } if (current_entry) From f67d395c6e3895c3c8c67c8f7523f6a94d61a82d Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 28 Jul 2017 12:04:32 -0300 Subject: [PATCH 137/253] perf annotate TUI: Fix column header when toggling period/percent We have the 't' hotkey to toggle showing either the total period or the percentage of samples for a given line, but we forgot to toggle as well the column header, always showing "Percent", even when showing the period, fix it. Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/1501172169-6761-1-git-send-email-treeze.taeung@gmail.com [ Extracted from a larger patch, s/Event count/Period/g ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 0f4bcc0d140c..46f297a4e94b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -166,7 +166,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!show_title) ui_browser__write_nstring(browser, " ", pcnt_width); else - ui_browser__printf(browser, "%*s", 7, "Percent"); + ui_browser__printf(browser, "%*s", 7, annotate_browser__opts.show_total_period ? "Period" : "Percent"); } if (ab->have_cycles) { if (dl->ipc) From 3861c4a49bea432c57d6e7cbd89c8b71ed4445b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 28 Jul 2017 13:19:32 -0300 Subject: [PATCH 138/253] perf annotate TUI: Set appropriate column width for period/percent Either when we start 'perf annotate' or 'perf report' with --show-total-period or when we, in the annotate browser, press 't' to toggle period/percent for the first column, we need to adjust the width for the 'period' case. Based-on-a-patch-by: Taeung Song Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-n2np5qcs20u6qjdr9orygne6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 46f297a4e94b..80f38dab9c3a 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -110,7 +110,7 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br static int annotate_browser__pcnt_width(struct annotate_browser *ab) { - return 7 * ab->nr_events; + return (annotate_browser__opts.show_total_period ? 12 : 7) * ab->nr_events; } static int annotate_browser__cycles_width(struct annotate_browser *ab) @@ -153,7 +153,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bdl->samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { - ui_browser__printf(browser, "%6" PRIu64 " ", + ui_browser__printf(browser, "%11" PRIu64 " ", bdl->samples[i].he.period); } else { ui_browser__printf(browser, "%6.2f ", @@ -165,8 +165,10 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!show_title) ui_browser__write_nstring(browser, " ", pcnt_width); - else - ui_browser__printf(browser, "%*s", 7, annotate_browser__opts.show_total_period ? "Period" : "Percent"); + else { + ui_browser__printf(browser, "%*s", pcnt_width, + annotate_browser__opts.show_total_period ? "Period" : "Percent"); + } } if (ab->have_cycles) { if (dl->ipc) From a3073c8e590d7baa5a6cb01438cb945c92bfcd91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Genevi=C3=A8ve=20Bastien?= Date: Thu, 27 Jul 2017 14:12:03 -0400 Subject: [PATCH 139/253] perf data: Add callchain to CTF conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The field perf_callchain, if available, is added to the sampling events during the CTF conversion. It is an array of u64 values. The perf_callchain_size field contains the size of the array. It will allow the analysis of sampling data in trace visualization tools like Trace Compass. Possible analyses with those data: dynamic flamegraphs, correlation with other tracing data like a userspace trace. Here follows a babeltrace CTF output of a trace with callchain: $ babeltrace ./myctftrace [17:38:45.672760285] (+?.?????????) cycles:ppp: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF81063EE4, perf_tid = 25841, perf_pid = 25774, perf_period = 1, perf_callchain_size = 7, perf_callchain = [ [0] = 0xFFFFFFFFFFFFFF80, [1] = 0xFFFFFFFF81063EE4, [2] = 0xFFFFFFFF8100C770, [3] = 0xFFFFFFFF81006EC6, [4] = 0xFFFFFFFF8118245E, [5] = 0xFFFFFFFF810A9224, [6] = 0xFFFFFFFF8164A4C6 ] } [17:38:45.672777672] (+0.000017387) cycles:ppp: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF81063EE4, perf_tid = 25841, perf_pid = 25774, perf_period = 1, perf_callchain_size = 8, perf_callchain = [ [0] = 0xFFFFFFFFFFFFFF80, [1] = 0xFFFFFFFF81063EE4, [2] = 0xFFFFFFFF8100C770, [3] = 0xFFFFFFFF81006EC6, [4] = 0xFFFFFFFF8118245E, [5] = 0xFFFFFFFF810A9224, [6] = 0xFFFFFFFF8164A4C6, [7] = 0xFFFFFFFF8164ABAD ] } [17:38:45.672786700] (+0.000009028) cycles:ppp: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF81063EE4, perf_tid = 25841, perf_pid = 25774, perf_period = 70, perf_callchain_size = 3, perf_callchain = [ [0] = 0xFFFFFFFFFFFFFF80, [1] = 0xFFFFFFFF81063EE4, [2] = 0xFFFFFFFF8100C770 ] } Signed-off-by: Geneviève Bastien Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Francis Deslauriers Cc: Julien Desfossez Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170727181205.24843-1-gbastien@versatic.net [ Removed PERF_SAMPLE_CALLCHAIN from the TODO list, jolsa ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 92 ++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 3149b70799fd..eeb2590a3ddf 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -506,6 +506,81 @@ add_bpf_output_values(struct bt_ctf_event_class *event_class, return ret; } +static int +add_callchain_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct ip_callchain *callchain) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int nr_elements = callchain->nr; + unsigned int i; + int ret; + + len_type = bt_ctf_event_class_get_field_by_name( + event_class, "perf_callchain_size"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'perf_callchain_size' for callchain output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for perf_callchain_size\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "perf_callchain_size", len_field); + if (ret) { + pr_err("failed to set payload to perf_callchain_size\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name( + event_class, "perf_callchain"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'perf_callchain' for callchain output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'perf_callchain'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u64 *)(callchain->ips))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set callchain[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "perf_callchain", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -519,7 +594,6 @@ static int add_generic_values(struct ctf_writer *cw, * PERF_SAMPLE_TIME - not needed as we have it in * ctf event header * PERF_SAMPLE_READ - TODO - * PERF_SAMPLE_CALLCHAIN - TODO * PERF_SAMPLE_RAW - tracepoint fields are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO @@ -720,6 +794,7 @@ static int process_sample_event(struct perf_tool *tool, struct bt_ctf_event_class *event_class; struct bt_ctf_event *event; int ret; + unsigned long type = evsel->attr.sample_type; if (WARN_ONCE(!priv, "Failed to setup all events.\n")) return 0; @@ -751,6 +826,13 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (type & PERF_SAMPLE_CALLCHAIN) { + ret = add_callchain_output_values(event_class, + event, sample->callchain); + if (ret) + return -1; + } + if (perf_evsel__is_bpf_output(evsel)) { ret = add_bpf_output_values(event_class, event, sample); if (ret) @@ -1043,6 +1125,14 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, if (type & PERF_SAMPLE_TRANSACTION) ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); + if (type & PERF_SAMPLE_CALLCHAIN) { + ADD_FIELD(event_class, cw->data.u32, "perf_callchain_size"); + ADD_FIELD(event_class, + bt_ctf_field_type_sequence_create( + cw->data.u64_hex, "perf_callchain_size"), + "perf_callchain"); + } + #undef ADD_FIELD return 0; } From f9f6f2a90343c5be3294d1336da055a99c28897d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Genevi=C3=A8ve=20Bastien?= Date: Thu, 27 Jul 2017 14:12:04 -0400 Subject: [PATCH 140/253] perf data: Add mmap[2] events to CTF conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the mmap and mmap2 events to the CTF trace obtained from perf data. These events will allow CTF trace visualization tools like Trace Compass to automatically resolve the symbols of the callchain to the corresponding function or origin library. To include those events, one needs to convert with the --all option. Here follows an output of babeltrace: $ sudo perf data convert --all --to-ctf myctftrace $ babeltrace ./myctftrace [19:00:00.000000000] (+0.000000000) perf_mmap2: { cpu_id = 0 }, { pid = 638, tid = 638, start = 0x7F54AE39E000, filename = "/usr/lib/ld-2.25.so" } [19:00:00.000000000] (+0.000000000) perf_mmap2: { cpu_id = 0 }, { pid = 638, tid = 638, start = 0x7F54AE565000, filename = "/usr/lib/libudev.so.1.6.6" } [19:00:00.000000000] (+0.000000000) perf_mmap2: { cpu_id = 0 }, { pid = 638, tid = 638, start = 0x7FFC093EA000, filename = "[vdso]" } Signed-off-by: Geneviève Bastien Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Francis Deslauriers Cc: Julien Desfossez Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170727181205.24843-2-gbastien@versatic.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index eeb2590a3ddf..2346cecb8ea2 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -76,6 +76,8 @@ struct ctf_writer { struct bt_ctf_event_class *comm_class; struct bt_ctf_event_class *exit_class; struct bt_ctf_event_class *fork_class; + struct bt_ctf_event_class *mmap_class; + struct bt_ctf_event_class *mmap2_class; }; struct convert { @@ -915,6 +917,18 @@ __FUNC_PROCESS_NON_SAMPLE(exit, __NON_SAMPLE_SET_FIELD(fork, u32, ptid); __NON_SAMPLE_SET_FIELD(fork, u64, time); ) +__FUNC_PROCESS_NON_SAMPLE(mmap, + __NON_SAMPLE_SET_FIELD(mmap, u32, pid); + __NON_SAMPLE_SET_FIELD(mmap, u32, tid); + __NON_SAMPLE_SET_FIELD(mmap, u64_hex, start); + __NON_SAMPLE_SET_FIELD(mmap, string, filename); +) +__FUNC_PROCESS_NON_SAMPLE(mmap2, + __NON_SAMPLE_SET_FIELD(mmap2, u32, pid); + __NON_SAMPLE_SET_FIELD(mmap2, u32, tid); + __NON_SAMPLE_SET_FIELD(mmap2, u64_hex, start); + __NON_SAMPLE_SET_FIELD(mmap2, string, filename); +) #undef __NON_SAMPLE_SET_FIELD #undef __FUNC_PROCESS_NON_SAMPLE @@ -1254,6 +1268,19 @@ __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit, __NON_SAMPLE_ADD_FIELD(u64, time); ) +__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap, + __NON_SAMPLE_ADD_FIELD(u32, pid); + __NON_SAMPLE_ADD_FIELD(u32, tid); + __NON_SAMPLE_ADD_FIELD(u64_hex, start); + __NON_SAMPLE_ADD_FIELD(string, filename); +) + +__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap2, + __NON_SAMPLE_ADD_FIELD(u32, pid); + __NON_SAMPLE_ADD_FIELD(u32, tid); + __NON_SAMPLE_ADD_FIELD(u64_hex, start); + __NON_SAMPLE_ADD_FIELD(string, filename); +) #undef __NON_SAMPLE_ADD_FIELD #undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS @@ -1269,6 +1296,12 @@ static int setup_non_sample_events(struct ctf_writer *cw, if (ret) return ret; ret = add_fork_event(cw); + if (ret) + return ret; + ret = add_mmap_event(cw); + if (ret) + return ret; + ret = add_mmap2_event(cw); if (ret) return ret; return 0; @@ -1572,6 +1605,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, c.tool.comm = process_comm_event; c.tool.exit = process_exit_event; c.tool.fork = process_fork_event; + c.tool.mmap = process_mmap_event; + c.tool.mmap2 = process_mmap2_event; } err = perf_config(convert__config, &c); From 6b7007af728df7258bb60ed73099be3b59b3030e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Genevi=C3=A8ve=20Bastien?= Date: Thu, 27 Jul 2017 14:12:05 -0400 Subject: [PATCH 141/253] perf data: Add doc when no conversion support compiled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds documentation on the environment variables needed to the message telling that no conversion support is compiled in. Committer testing: $ make -C tools/perf install $ perf data convert --all --to-ctf myctftrace No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/ $ Signed-off-by: Geneviève Bastien Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Francis Deslauriers Cc: Julien Desfossez Cc: Mathieu Desnoyers Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170727181205.24843-3-gbastien@versatic.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 0adb5f82335a..46cd8490baf4 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -69,7 +69,7 @@ static int cmd_data_convert(int argc, const char **argv) }; #ifndef HAVE_LIBBABELTRACE_SUPPORT - pr_err("No conversion support compiled in.\n"); + pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); return -1; #endif From c59796d53b1c96fd8a0a63119820fb81e06e4aa4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 30 Jul 2017 11:37:47 +0200 Subject: [PATCH 142/253] perf build: Clarify header version warning message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change this: Warning: arch/x86/include/asm/disabled-features.h differs from kernel Warning: arch/x86/include/asm/cpufeatures.h differs from kernel Warning: arch/powerpc/include/uapi/asm/kvm.h differs from kernel Warning: arch/s390/include/uapi/asm/kvm.h differs from kernel Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/kvm.h' differs from latest version at 'arch/powerpc/include/uapi/asm/kvm.h' Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/kvm.h' differs from latest version at 'arch/s390/include/uapi/asm/kvm.h' ... to make it clearer what the warning is about, and to make it easier to diff the two versions when syncing up the files. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Francis Deslauriers Cc: Geneviève Bastien Cc: Jiri Olsa Cc: Julien Desfossez Cc: Martin Liška Cc: Mathieu Desnoyers Cc: Milian Wolff Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/r/20170730093747.qogjn3lp7ntwcgwg@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9b6295809a3b..d87bc6af0ac2 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -47,7 +47,7 @@ check () { cmd="diff $opts ../$file ../../$file > /dev/null" test -f ../../$file && - eval $cmd || echo "Warning: $file differs from kernel" >&2 + eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2 } From 8255e1efc193f110f1c9b8e40597a6c0c89fbedc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 30 Jul 2017 11:51:30 +0200 Subject: [PATCH 143/253] perf build: Clarify open-coded header version warning message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this patch we changed the header checks: perf build: Clarify header version warning message Unfortunately the header checks were copied to various places and thus the message got out of sync. Fix some of them here. Note that there's still old, misleading messages remaining in: tools/objtool/Makefile: || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true tools/objtool/Makefile: || echo "warning: objtool: orc_types.h differs from kernel" >&2 )) || true here objtool copied the perf message, plus: tools/perf/util/intel-pt-decoder/Build: || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true here the PT code regressed over the original message and only emits a vague warning instead of specific file names... All of this should be consolidated into tools/Build/ and used in a consistent manner. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Francis Deslauriers Cc: Geneviève Bastien Cc: Jiri Olsa Cc: Julien Desfossez Cc: Martin Liška Cc: Mathieu Desnoyers Cc: Milian Wolff Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/r/20170730095130.bblldwxjz5hamybb@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/Makefile | 4 ++-- tools/perf/arch/x86/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1f5300e56b44..e87b5903f4bb 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -156,10 +156,10 @@ all_cmd: $(CMD_TARGETS) $(BPF_IN): force elfdep bpfdep @(test -f ../../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ - echo "Warning: tools/include/uapi/linux/bpf.h differs from kernel" >&2 )) || true + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @(test -f ../../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ - echo "Warning: tools/include/uapi/linux/bpf_common.h differs from kernel" >&2 )) || true + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 6c9211b18ec0..9a628a24c5c9 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -20,7 +20,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sys)/syscall_64.tbl $(systbl) @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ - || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true + || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: From a40f61777b8c66cfd61c37b0102387578abb8ed0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 30 Jul 2017 11:52:32 +0200 Subject: [PATCH 144/253] tools headers: Sync kernel ABI headers with tooling headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync up (copy) the following v4.13 kernel headers to the tooling headers: arch/arm/include/uapi/asm/kvm.h: arch/arm64/include/uapi/asm/kvm.h: arch/powerpc/include/uapi/asm/kvm.h: arch/s390/include/uapi/asm/kvm.h: - KVM ABI extensions, which do not affect perf tooling arch/x86/include/asm/cpufeatures.h: arch/x86/include/asm/disabled-features.h: - New PCID CPU feature on Intel CPUs - does not affect tooling. I.e. no real changes were needed to resolve the build warnings, just a plain copy of the latest kernel header version. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Francis Deslauriers Cc: Geneviève Bastien Cc: Jiri Olsa Cc: Julien Desfossez Cc: Martin Liška Cc: Mathieu Desnoyers Cc: Milian Wolff Cc: Namhyung Kim Cc: Paul Turner Cc: Peter Zijlstra Cc: Simon Que Cc: Stephane Eranian Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/r/20170730095232.4j4xigsoqwufl5hu@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 8 ++++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 3 +++ tools/arch/powerpc/include/uapi/asm/kvm.h | 6 ++++++ tools/arch/s390/include/uapi/asm/kvm.h | 12 ++++++++++++ tools/arch/x86/include/asm/cpufeatures.h | 2 ++ tools/arch/x86/include/asm/disabled-features.h | 4 +++- 6 files changed, 34 insertions(+), 1 deletion(-) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 07fbeb927834..8cf8f0c96906 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -60,6 +60,12 @@ struct kvm_regs { #define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */ +/* flags for kvm_run.flags */ +#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0) +#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0) +#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0) +#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0) + /* * Feature bits indicate which sections of the sregs struct are valid, * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 3dd2a1d308dd..69d09c39bbcd 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -28,6 +28,7 @@ #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 #define KVM_DEV_FLIC_AISM 9 #define KVM_DEV_FLIC_AIRQ_INJECT 10 +#define KVM_DEV_FLIC_AISM_ALL 11 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -53,6 +54,11 @@ struct kvm_s390_ais_req { __u16 mode; }; +struct kvm_s390_ais_all { + __u8 simm; + __u8 nimm; +}; + #define KVM_S390_IO_ADAPTER_MASK 1 #define KVM_S390_IO_ADAPTER_MAP 2 #define KVM_S390_IO_ADAPTER_UNMAP 3 @@ -70,6 +76,7 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_TOD 1 #define KVM_S390_VM_CRYPTO 2 #define KVM_S390_VM_CPU_MODEL 3 +#define KVM_S390_VM_MIGRATION 4 /* kvm attributes for mem_ctrl */ #define KVM_S390_VM_MEM_ENABLE_CMMA 0 @@ -151,6 +158,11 @@ struct kvm_s390_vm_cpu_subfunc { #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 +/* kvm attributes for migration mode */ +#define KVM_S390_VM_MIGRATION_STOP 0 +#define KVM_S390_VM_MIGRATION_START 1 +#define KVM_S390_VM_MIGRATION_STATUS 2 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2701e5f8145b..14f0f2913364 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -196,6 +196,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ @@ -286,6 +287,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5dff775af7cd..c10c9128f54e 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -21,11 +21,13 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -49,7 +51,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 0 +#define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 From d62c1d7213b77eb4952a873ad57727e991ac3306 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 10:45:07 -0300 Subject: [PATCH 145/253] tools headers: Fixup tools/include/uapi/linux/bpf.h copy of kernel ABI header In 2be7e212d541 ("bpf: add bpf_skb_adjust_room helper") BPF_ADJ_ROOM_NET was added to include/uapi/linux/bpf.h but BPF_ADJ_ROOM_NET_OPS was added to tools/include/uapi/linux/bpf.h, making these files differ, fix it by using the same name in both, BPF_ADJ_ROOM_NET, the one in the kernel headers copy. Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 2be7e212d541 ("bpf: add bpf_skb_adjust_room helper") Link: http://lkml.kernel.org/n/tip-2bmwovi9lymplyz6wsszppyf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ce2988be4f0e..8ff155ffcd84 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -643,7 +643,7 @@ enum bpf_func_id { /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { - BPF_ADJ_ROOM_NET_OPTS, + BPF_ADJ_ROOM_NET, }; /* user accessible mirror of in-kernel sk_buff. From 6375f0abeea886ec718d8b0aaf99818c7505de14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 22:52:28 -0300 Subject: [PATCH 146/253] tools include uapi: Grab a copy of asm-generic/ioctls.h So that we can build on older systems where otherwise we would end up with: CC /tmp/build/perf/trace/beauty/ioctl.o trace/beauty/ioctl.c: In function 'ioctl__scnprintf_tty_cmd': trace/beauty/ioctl.c:25:17: error: 'TIOCGEXCL' undeclared (first use in this function) trace/beauty/ioctl.c:25:17: note: each undeclared identifier is reported only once for each function it appears in trace/beauty/ioctl.c:25:2: error: array index in initializer not of integer type trace/beauty/ioctl.c:25:2: error: (near initialization for 'ioctl_tty_cmd') This way we can build a tool on an older system and it will still be capable of processing perf.data files generated on newer systems. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-8qvkv6txwuzua6d0yvt65wl3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/ioctls.h | 118 ++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 1 + 3 files changed, 120 insertions(+) create mode 100644 tools/include/uapi/asm-generic/ioctls.h diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h new file mode 100644 index 000000000000..14baf9f23a14 --- /dev/null +++ b/tools/include/uapi/asm-generic/ioctls.h @@ -0,0 +1,118 @@ +#ifndef __ASM_GENERIC_IOCTLS_H +#define __ASM_GENERIC_IOCTLS_H + +#include + +/* + * These are the most common definitions for tty ioctl numbers. + * Most of them do not use the recommended _IOC(), but there is + * probably some source code out there hardcoding the number, + * so we might as well use them for all new platforms. + * + * The architectures that use different values here typically + * try to be compatible with some Unix variants for the same + * architecture. + */ + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TCGETS2 _IOR('T', 0x2A, struct termios2) +#define TCSETS2 _IOW('T', 0x2B, struct termios2) +#define TCSETSW2 _IOW('T', 0x2C, struct termios2) +#define TCSETSF2 _IOW('T', 0x2D, struct termios2) +#define TIOCGRS485 0x542E +#ifndef TIOCSRS485 +#define TIOCSRS485 0x542F +#endif +#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */ +#define TIOCGDEV _IOR('T', 0x32, unsigned int) /* Get primary device node of /dev/console */ +#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */ +#define TCSETX 0x5433 +#define TCSETXF 0x5434 +#define TCSETXW 0x5435 +#define TIOCSIG _IOW('T', 0x36, int) /* pty: generate signal */ +#define TIOCVHANGUP 0x5437 +#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ +#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ +#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ + +#define FIONCLEX 0x5450 +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +/* + * Some arches already define FIOQSIZE due to a historical + * conflict with a Hayes modem-specific ioctl value. + */ +#ifndef FIOQSIZE +# define FIOQSIZE 0x5460 +#endif + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 +#define TIOCPKT_IOCTL 64 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* __ASM_GENERIC_IOCTLS_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 705bdb147e73..2ee433c52130 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -71,6 +71,7 @@ tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h tools/include/uapi/asm-generic/fcntl.h +tools/include/uapi/asm-generic/ioctls.h tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h tools/include/uapi/linux/bpf.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index d87bc6af0ac2..ff0a5fabf055 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -31,6 +31,7 @@ include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/asm-generic/bitops/fls64.h include/linux/coresight-pmu.h +include/uapi/asm-generic/ioctls.h include/uapi/asm-generic/mman-common.h ' From 470de0f39e901611c366fc3d584307ba5d13406a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 22:55:12 -0300 Subject: [PATCH 147/253] tools perf: Do not check spaces/blank lines when checking header file copy drift We copy headers from include/, arch/ to allow tools/ use defines, structs from newer kernels and still be able to build on older systems. We then, as part of a build, check if those copies got out of sync, when we emit a warning, so that we can check if something needs to be reflected on the tools, e.g. a 'perf trace' syscall argument beautifier needs tweaking. But we don't have to be super strict with that, for instance, extra spaces, tabs or blank lines aren't problematic, so change check-headers.sh to have "--ignore-blank-lines --ignore-space-change" as default "diff" arguments. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d8emqpdc3m2qtzt1ei8ra2tf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index ff0a5fabf055..3f0c6577f8cc 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -37,7 +37,7 @@ include/uapi/asm-generic/mman-common.h check () { file=$1 - opts= + opts="--ignore-blank-lines --ignore-space-change" shift while [ -n "$*" ]; do @@ -58,7 +58,7 @@ for i in $HEADERS; do done # diff with extra ignore lines -check arch/x86/lib/memcpy_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " -check arch/x86/lib/memset_64.S -B -I "^EXPORT_SYMBOL" -I "^#include " -check include/uapi/asm-generic/mman.h -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" -check include/uapi/linux/mman.h -B -I "^#include <\(uapi/\)*asm/mman.h>" +check arch/x86/lib/memcpy_64.S -I "^EXPORT_SYMBOL" -I "^#include " +check arch/x86/lib/memset_64.S -I "^EXPORT_SYMBOL" -I "^#include " +check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" +check include/uapi/linux/mman.h -I "^#include <\(uapi/\)*asm/mman.h>" From f1d6cb2d8cdfb210f9b4a0f59e9168ac0f5db774 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 10:45:07 -0300 Subject: [PATCH 148/253] tools headers: Fixup tools/include/uapi/linux/bpf.h copy of kernel ABI header In 04df41e343db ("bpf: update tools/include/uapi/linux/bpf.h") the files added in 40304b2a1567 ("bpf: BPF support for sock_ops") were added to tools/include, but not in a verbatim way, missing the comments, which ends up triggering this warning when build tools/perf/: make: Entering directory '/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' Make sure the the lines are equal, to fix the simple header copy drift detector in tools/perf/. Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Lawrence Brakmo Cc: Namhyung Kim Cc: Wang Nan Fixes: 04df41e343db ("bpf: update tools/include/uapi/linux/bpf.h") Link: http://lkml.kernel.org/n/tip-z9qyyqht9qq3yyxu76sfy0dh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8ff155ffcd84..e99e3e6f8b37 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -750,6 +750,8 @@ struct bpf_map_info { /* User bpf_sock_ops struct to access socket values and specify request ops * and their replies. + * Some of this fields are in network (bigendian) byte order and may need + * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h). * New fields can only be added at the end of this structure */ struct bpf_sock_ops { @@ -759,12 +761,12 @@ struct bpf_sock_ops { __u32 replylong[4]; }; __u32 family; - __u32 remote_ip4; - __u32 local_ip4; - __u32 remote_ip6[4]; - __u32 local_ip6[4]; - __u32 remote_port; - __u32 local_port; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; /* List of known BPF sock_ops operators. From 1cc47f2d46206d67285aea0ca7e8450af571da13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 13:20:14 -0300 Subject: [PATCH 149/253] perf trace beauty ioctl: Improve 'cmd' beautifier By using the _IOC_(DIR,NR,TYPE,SIZE) macros to lookup a 'type' keyed table that then gets indexed by 'nr', falling back to a notation similar to the one used by 'strace', only more compact, i.e.: 474.356 ( 0.007 ms): gnome-shell/22401 ioctl(fd: 8, cmd: (READ|WRITE, 0x64, 0xae, 0x1c), arg: 0x7ffc934f7880) = 0 474.369 ( 0.053 ms): gnome-shell/22401 ioctl(fd: 8, cmd: (READ|WRITE, 0x64, 0xb0, 0x18), arg: 0x7ffc934f77d0) = 0 505.055 ( 0.014 ms): gnome-shell/22401 ioctl(fd: 8, cmd: (READ|WRITE, 0x64, 0xaf, 0x4), arg: 0x7ffc934f741c) = 0 This also moves it out of builtin-trace.c and into trace/beauty/ioctl.c to better compartimentalize all these formatters. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-s3enursdxsvnhdomh6qlte4g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 44 +--------------- tools/perf/trace/beauty/Build | 3 ++ tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/ioctl.c | 88 ++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 43 deletions(-) create mode 100644 tools/perf/trace/beauty/ioctl.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 05d24b6570ee..de02413a25d3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -338,20 +338,6 @@ size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, return scnprintf(bf, size, "%d", arg->val); } -#if defined(__i386__) || defined(__x86_64__) -/* - * FIXME: Make this available to all arches as soon as the ioctl beautifier - * gets rewritten to support all arches. - */ -static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size, - struct syscall_arg *arg) -{ - return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg); -} - -#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray -#endif /* defined(__i386__) || defined(__x86_64__) */ - #ifndef AT_FDCWD #define AT_FDCWD -100 #endif @@ -525,33 +511,6 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags -#if defined(__i386__) || defined(__x86_64__) -/* - * FIXME: Make this available to all arches. - */ -#define TCGETS 0x5401 - -static const char *tioctls[] = { - "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", - "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", - "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", - "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC", - "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX", - "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO", - "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK", - "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2", - "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", - "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", - "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL", - [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", - "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", - "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", - "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE", -}; - -static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); -#endif /* defined(__i386__) || defined(__x86_64__) */ - #ifndef GRND_NONBLOCK #define GRND_NONBLOCK 0x0001 #endif @@ -670,8 +629,7 @@ static struct syscall_fmt { /* * FIXME: Make this available to all arches. */ - [1] = { .scnprintf = SCA_STRHEXARRAY, /* cmd */ - .parm = &strarray__tioctls, }, + [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, #else [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index eaa1e8e8e100..6f3f159f97e0 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,3 +1,6 @@ libperf-y += clone.o libperf-y += fcntl.o +ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) +libperf-y += ioctl.o +endif libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 69a5c8a2d420..47a36a8eb842 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -75,6 +75,9 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg +size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd + size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c new file mode 100644 index 000000000000..5f0dba8aaa88 --- /dev/null +++ b/tools/perf/trace/beauty/ioctl.c @@ -0,0 +1,88 @@ +/* + * trace/beauty/ioctl.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include + +/* + * FIXME: to support all arches we have to improve this, for + * now, to build on older systems without things like TIOCGEXCL, + * get it directly from our copy. + * + * Right now only x86 is being supported for beautifying ioctl args + * in 'perf trace', see tools/perf/trace/beauty/Build and builtin-trace.c + */ +#include + +static size_t ioctl__scnprintf_tty_cmd(int nr, char *bf, size_t size) +{ + static const char *ioctl_tty_cmd[] = { + "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", + "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", "TIOCSCTTY", + "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", "TIOCGWINSZ", "TIOCSWINSZ", + "TIOCMGET", "TIOCMBIS", "TIOCMBIC", "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", + "FIONREAD", "TIOCLINUX", "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", + "FIONBIO", "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", + [_IOC_NR(TIOCSBRK)] = "TIOCSBRK", "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", + "TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", + "TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT", + "TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER", + [_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", + "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", + "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", + "TIOCMIWAIT", "TIOCGICOUNT", }; + static DEFINE_STRARRAY(ioctl_tty_cmd); + + if (nr < strarray__ioctl_tty_cmd.nr_entries && strarray__ioctl_tty_cmd.entries[nr] != NULL) + return scnprintf(bf, size, "%s", strarray__ioctl_tty_cmd.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x)", 'T', nr); +} + +static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) +{ + int dir = _IOC_DIR(cmd), + type = _IOC_TYPE(cmd), + nr = _IOC_NR(cmd), + sz = _IOC_SIZE(cmd); + int printed = 0; + static const struct ioctl_type { + int type; + size_t (*scnprintf)(int nr, char *bf, size_t size); + } ioctl_types[] = { /* Must be ordered by type */ + { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, } + }; + const int nr_types = ARRAY_SIZE(ioctl_types); + + if (type >= ioctl_types[0].type && type <= ioctl_types[nr_types - 1].type) { + const int index = type - ioctl_types[0].type; + + if (ioctl_types[index].scnprintf != NULL) + return ioctl_types[index].scnprintf(nr, bf, size); + } + + printed += scnprintf(bf + printed, size - printed, "%c", '('); + + if (dir == _IOC_NONE) { + printed += scnprintf(bf + printed, size - printed, "%s", "NONE"); + } else { + if (dir & _IOC_READ) + printed += scnprintf(bf + printed, size - printed, "%s", "READ"); + if (dir & _IOC_WRITE) + printed += scnprintf(bf + printed, size - printed, "%s%s", dir & _IOC_READ ? "|" : "", "WRITE"); + } + + return printed + scnprintf(bf + printed, size - printed, ", %#x, %#x, %#x)", type, nr, sz); +} + +size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long cmd = arg->val; + + return ioctl__scnprintf_cmd(cmd, bf, size); +} From c1737f2b7810cf4cd353f3931d2d23e81b591f90 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 16:45:32 -0300 Subject: [PATCH 150/253] tools include uapi: Grab copies of drm/{drm,i915_drm}.h We will use it to generate tables for beautifying ioctl's 'cmd' arg. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bqoq114h917u6ggazn8m1w0t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 933 ++++++++++++++++++ tools/include/uapi/drm/i915_drm.h | 1474 +++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 + tools/perf/check-headers.sh | 2 + 4 files changed, 2411 insertions(+) create mode 100644 tools/include/uapi/drm/drm.h create mode 100644 tools/include/uapi/drm/i915_drm.h diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h new file mode 100644 index 000000000000..101593ab10ac --- /dev/null +++ b/tools/include/uapi/drm/drm.h @@ -0,0 +1,933 @@ +/** + * \file drm.h + * Header for the Direct Rendering Manager + * + * \author Rickard E. (Rik) Faith + * + * \par Acknowledgments: + * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#if defined(__KERNEL__) + +#include +#include +typedef unsigned int drm_handle_t; + +#elif defined(__linux__) + +#include +#include +typedef unsigned int drm_handle_t; + +#else /* One of the BSDs */ + +#include +#include +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef size_t __kernel_size_t; +typedef unsigned long drm_handle_t; + +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/** + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/** + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/** + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/** + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/** + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char __user *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char __user *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char __user *desc; /**< User-space buffer to hold desc */ +}; + +/** + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + __kernel_size_t unique_len; /**< Length of unique */ + char __user *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version __user *version; +}; + +struct drm_block { + int unused; +}; + +/** + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/** + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ +}; + +/** + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/** + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/** + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/** + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/** + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/** + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/** + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/** + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/** + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc __user *list; +}; + +/** + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int __user *list; +}; + +/** + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void __user *address; /**< Address of buffer */ +}; + +/** + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void __user *virt; +#else + void __user *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub __user *list; /**< Buffer information */ +}; + +/** + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int __user *send_indices; /**< List of handles to buffers */ + int __user *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int __user *request_indices; /**< Buffer information */ + int __user *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/** + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/** + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx __user *contexts; +}; + +/** + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/** + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { + DRM_DRAWABLE_CLIPRECTS +} drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/** + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/** + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/** + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/** + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/** + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/** + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/** + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/** + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/** + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/** + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +#define DRM_CAP_DUMB_BUFFER 0x1 +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +#define DRM_CAP_PRIME 0x5 +#define DRM_PRIME_CAP_IMPORT 0x1 +#define DRM_PRIME_CAP_EXPORT 0x2 +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/* + * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight + * combination for the hardware cursor. The intention is that a hardware + * agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +#define DRM_CAP_CURSOR_HEIGHT 0x9 +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +#define DRM_CAP_SYNCOBJ 0x13 + +/** DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; +}; + +#if defined(__cplusplus) +} +#endif + +#include "drm_mode.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) + +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ +#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) +#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) +#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) +#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) +#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) + +/** + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x9f. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * Header for events written back to userspace on the drm fd. The + * type defines the type of event, the length specifies the total + * length of the event (including the header), and user_data is + * typically a 64 bit value passed with the ioctl that triggered the + * event. A read on the drm fd will always only return complete + * events, that is, if for example the read buffer is 100 bytes, and + * there are two 64 byte events pending, only one will be returned. + * + * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and + * up are chipset specific. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +#define DRM_EVENT_VBLANK 0x01 +#define DRM_EVENT_FLIP_COMPLETE 0x02 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +/* typedef area */ +#ifndef __KERNEL__ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h new file mode 100644 index 000000000000..7ccbd6a2bbe0 --- /dev/null +++ b/tools/include/uapi/drm/i915_drm.h @@ -0,0 +1,1474 @@ +/* + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _UAPI_I915_DRM_H_ +#define _UAPI_I915_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +/** + * DOC: uevents generated by i915 on it's device node + * + * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch + * event from the gpu l3 cache. Additional information supplied is ROW, + * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep + * track of these events and if a specific cache-line seems to have a + * persistent error remap it with the l3 remapping tool supplied in + * intel-gpu-tools. The value supplied with the event is always 1. + * + * I915_ERROR_UEVENT - Generated upon error detection, currently only via + * hangcheck. The error detection event is a good indicator of when things + * began to go badly. The value supplied with the event is a 1 upon error + * detection, and a 0 upon reset completion, signifying no more error + * exists. NOTE: Disabling hangcheck or reset via module parameter will + * cause the related events to not be seen. + * + * I915_RESET_UEVENT - Event is generated just before an attempt to reset the + * the GPU. The value supplied with the event is always 1. NOTE: Disable + * reset via module parameter will cause this event to not be seen. + */ +#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" +#define I915_ERROR_UEVENT "ERROR" +#define I915_RESET_UEVENT "RESET" + +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + +/* Each region is a minimum of 16k, and there are at most 255 of them. + */ +#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use + * of chars for next/prev indices */ +#define I915_LOG_MIN_TEX_REGION_SIZE 14 + +typedef struct _drm_i915_init { + enum { + I915_INIT_DMA = 0x01, + I915_CLEANUP_DMA = 0x02, + I915_RESUME_DMA = 0x03 + } func; + unsigned int mmio_offset; + int sarea_priv_offset; + unsigned int ring_start; + unsigned int ring_end; + unsigned int ring_size; + unsigned int front_offset; + unsigned int back_offset; + unsigned int depth_offset; + unsigned int w; + unsigned int h; + unsigned int pitch; + unsigned int pitch_bits; + unsigned int back_pitch; + unsigned int depth_pitch; + unsigned int cpp; + unsigned int chipset; +} drm_i915_init_t; + +typedef struct _drm_i915_sarea { + struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1]; + int last_upload; /* last time texture was uploaded */ + int last_enqueue; /* last time a buffer was enqueued */ + int last_dispatch; /* age of the most recently dispatched buffer */ + int ctxOwner; /* last context to upload state */ + int texAge; + int pf_enabled; /* is pageflipping allowed? */ + int pf_active; + int pf_current_page; /* which buffer is being displayed? */ + int perf_boxes; /* performance boxes to be displayed */ + int width, height; /* screen size in pixels */ + + drm_handle_t front_handle; + int front_offset; + int front_size; + + drm_handle_t back_handle; + int back_offset; + int back_size; + + drm_handle_t depth_handle; + int depth_offset; + int depth_size; + + drm_handle_t tex_handle; + int tex_offset; + int tex_size; + int log_tex_granularity; + int pitch; + int rotation; /* 0, 90, 180 or 270 */ + int rotated_offset; + int rotated_size; + int rotated_pitch; + int virtualX, virtualY; + + unsigned int front_tiled; + unsigned int back_tiled; + unsigned int depth_tiled; + unsigned int rotated_tiled; + unsigned int rotated2_tiled; + + int pipeA_x; + int pipeA_y; + int pipeA_w; + int pipeA_h; + int pipeB_x; + int pipeB_y; + int pipeB_w; + int pipeB_h; + + /* fill out some space for old userspace triple buffer */ + drm_handle_t unused_handle; + __u32 unused1, unused2, unused3; + + /* buffer object handles for static buffers. May change + * over the lifetime of the client. + */ + __u32 front_bo_handle; + __u32 back_bo_handle; + __u32 unused_bo_handle; + __u32 depth_bo_handle; + +} drm_i915_sarea_t; + +/* due to userspace building against these headers we need some compat here */ +#define planeA_x pipeA_x +#define planeA_y pipeA_y +#define planeA_w pipeA_w +#define planeA_h pipeA_h +#define planeB_x pipeB_x +#define planeB_y pipeB_y +#define planeB_w pipeB_w +#define planeB_h pipeB_h + +/* Flags for perf_boxes + */ +#define I915_BOX_RING_EMPTY 0x1 +#define I915_BOX_FLIP 0x2 +#define I915_BOX_WAIT 0x4 +#define I915_BOX_TEXTURE_LOAD 0x8 +#define I915_BOX_LOST_CONTEXT 0x10 + +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_I915_INIT 0x00 +#define DRM_I915_FLUSH 0x01 +#define DRM_I915_FLIP 0x02 +#define DRM_I915_BATCHBUFFER 0x03 +#define DRM_I915_IRQ_EMIT 0x04 +#define DRM_I915_IRQ_WAIT 0x05 +#define DRM_I915_GETPARAM 0x06 +#define DRM_I915_SETPARAM 0x07 +#define DRM_I915_ALLOC 0x08 +#define DRM_I915_FREE 0x09 +#define DRM_I915_INIT_HEAP 0x0a +#define DRM_I915_CMDBUFFER 0x0b +#define DRM_I915_DESTROY_HEAP 0x0c +#define DRM_I915_SET_VBLANK_PIPE 0x0d +#define DRM_I915_GET_VBLANK_PIPE 0x0e +#define DRM_I915_VBLANK_SWAP 0x0f +#define DRM_I915_HWS_ADDR 0x11 +#define DRM_I915_GEM_INIT 0x13 +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_PIN 0x15 +#define DRM_I915_GEM_UNPIN 0x16 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_ENTERVT 0x19 +#define DRM_I915_GEM_LEAVEVT 0x1a +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SW_FINISH 0x20 +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 +#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 +#define DRM_I915_OVERLAY_ATTRS 0x28 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 +#define DRM_I915_GET_SPRITE_COLORKEY 0x2a +#define DRM_I915_SET_SPRITE_COLORKEY 0x2b +#define DRM_I915_GEM_WAIT 0x2c +#define DRM_I915_GEM_CONTEXT_CREATE 0x2d +#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e +#define DRM_I915_GEM_SET_CACHING 0x2f +#define DRM_I915_GEM_GET_CACHING 0x30 +#define DRM_I915_REG_READ 0x31 +#define DRM_I915_GET_RESET_STATS 0x32 +#define DRM_I915_GEM_USERPTR 0x33 +#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 +#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 +#define DRM_I915_PERF_OPEN 0x36 + +#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) +#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) +#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP) +#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t) +#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t) +#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t) +#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t) +#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t) +#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t) +#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t) +#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t) +#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t) +#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t) +#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t) +#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) +#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) +#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_GET_CACHING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) +#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) +#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) +#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) +#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image) +#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) +#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) +#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) +#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) + +/* Allow drivers to submit batchbuffers directly to hardware, relying + * on the security mechanisms provided by hardware. + */ +typedef struct drm_i915_batchbuffer { + int start; /* agp offset */ + int used; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */ +} drm_i915_batchbuffer_t; + +/* As above, but pass a pointer to userspace buffer which can be + * validated by the kernel prior to sending to hardware. + */ +typedef struct _drm_i915_cmdbuffer { + char __user *buf; /* pointer to userspace command buffer */ + int sz; /* nr bytes in buf */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */ +} drm_i915_cmdbuffer_t; + +/* Userspace can request & wait on irq's: + */ +typedef struct drm_i915_irq_emit { + int __user *irq_seq; +} drm_i915_irq_emit_t; + +typedef struct drm_i915_irq_wait { + int irq_seq; +} drm_i915_irq_wait_t; + +/* Ioctl to query kernel params: + */ +#define I915_PARAM_IRQ_ACTIVE 1 +#define I915_PARAM_ALLOW_BATCHBUFFER 2 +#define I915_PARAM_LAST_DISPATCH 3 +#define I915_PARAM_CHIPSET_ID 4 +#define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 +#define I915_PARAM_HAS_OVERLAY 7 +#define I915_PARAM_HAS_PAGEFLIPPING 8 +#define I915_PARAM_HAS_EXECBUF2 9 +#define I915_PARAM_HAS_BSD 10 +#define I915_PARAM_HAS_BLT 11 +#define I915_PARAM_HAS_RELAXED_FENCING 12 +#define I915_PARAM_HAS_COHERENT_RINGS 13 +#define I915_PARAM_HAS_EXEC_CONSTANTS 14 +#define I915_PARAM_HAS_RELAXED_DELTA 15 +#define I915_PARAM_HAS_GEN7_SOL_RESET 16 +#define I915_PARAM_HAS_LLC 17 +#define I915_PARAM_HAS_ALIASING_PPGTT 18 +#define I915_PARAM_HAS_WAIT_TIMEOUT 19 +#define I915_PARAM_HAS_SEMAPHORES 20 +#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 +#define I915_PARAM_HAS_VEBOX 22 +#define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 +#define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 +#define I915_PARAM_MMAP_VERSION 30 +#define I915_PARAM_HAS_BSD2 31 +#define I915_PARAM_REVISION 32 +#define I915_PARAM_SUBSLICE_TOTAL 33 +#define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_GPU_RESET 35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 +#define I915_PARAM_HAS_POOLED_EU 38 +#define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution + * priorities and the driver will attempt to execute batches in priority order. + */ +#define I915_PARAM_HAS_SCHEDULER 41 +#define I915_PARAM_HUC_STATUS 42 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +typedef struct drm_i915_getparam { + __s32 param; + /* + * WARNING: Using pointers instead of fixed-size u64 means we need to write + * compat32 code. Don't repeat this mistake. + */ + int __user *value; +} drm_i915_getparam_t; + +/* Ioctl to set kernel params: + */ +#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 + +typedef struct drm_i915_setparam { + int param; + int value; +} drm_i915_setparam_t; + +/* A memory manager for regions of shared memory: + */ +#define I915_MEM_REGION_AGP 1 + +typedef struct drm_i915_mem_alloc { + int region; + int alignment; + int size; + int __user *region_offset; /* offset from start of fb or agp */ +} drm_i915_mem_alloc_t; + +typedef struct drm_i915_mem_free { + int region; + int region_offset; +} drm_i915_mem_free_t; + +typedef struct drm_i915_mem_init_heap { + int region; + int size; + int start; +} drm_i915_mem_init_heap_t; + +/* Allow memory manager to be torn down and re-initialized (eg on + * rotate): + */ +typedef struct drm_i915_mem_destroy_heap { + int region; +} drm_i915_mem_destroy_heap_t; + +/* Allow X server to configure which pipes to monitor for vblank signals + */ +#define DRM_I915_VBLANK_PIPE_A 1 +#define DRM_I915_VBLANK_PIPE_B 2 + +typedef struct drm_i915_vblank_pipe { + int pipe; +} drm_i915_vblank_pipe_t; + +/* Schedule buffer swap at given vertical blank: + */ +typedef struct drm_i915_vblank_swap { + drm_drawable_t drawable; + enum drm_vblank_seq_type seqtype; + unsigned int sequence; +} drm_i915_vblank_swap_t; + +typedef struct drm_i915_hws_addr { + __u64 addr; +} drm_i915_hws_addr_t; + +struct drm_i915_gem_init { + /** + * Beginning offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_start; + /** + * Ending offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_end; +}; + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + __u32 handle; + __u32 pad; + /** Offset into the object to read from */ + __u64 offset; + /** Length of data to read */ + __u64 size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + __u32 handle; + __u32 pad; + /** Offset into the object to write to */ + __u64 offset; + /** Length of data to write */ + __u64 size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** Offset in the object to map. */ + __u64 offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + __u64 size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 addr_ptr; + + /** + * Flags for extended behaviour. + * + * Added in version 2. + */ + __u64 flags; +#define I915_MMAP_WC 0x1 +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +struct drm_i915_gem_set_domain { + /** Handle for the object */ + __u32 handle; + + /** New read domains */ + __u32 read_domains; + + /** New write domain */ + __u32 write_domain; +}; + +struct drm_i915_gem_sw_finish { + /** Handle for the object */ + __u32 handle; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + __u32 target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + __u32 delta; + + /** Offset in the buffer the relocation entry will be written into */ + __u64 offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + __u64 presumed_offset; + + /** + * Target memory domains read by this operation. + */ + __u32 read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + __u32 write_domain; +}; + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 +/** @} */ + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; +}; + +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +}; + +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * When the EXEC_OBJECT_PINNED flag is specified this is populated by + * the user with the GTT offset at which this object will be pinned. + * When the I915_EXEC_NO_RELOC flag is specified this must contain the + * presumed_offset of the object. + * During execbuffer2 the kernel populates it with the value of the + * current GTT offset of the object, for future presumed_offset writes. + */ + __u64 offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1<<6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1<<7) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) + __u64 flags; + + union { + __u64 rsvd1; + __u64 pad_to_size; + }; + __u64 rsvd2; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +#define I915_EXEC_RING_MASK (7<<0) +#define I915_EXEC_DEFAULT (0<<0) +#define I915_EXEC_RENDER (1<<0) +#define I915_EXEC_BSD (2<<0) +#define I915_EXEC_BLT (3<<0) +#define I915_EXEC_VEBOX (4<<0) + +/* Used for switching the constants addressing mode on gen4+ RENDER ring. + * Gen6+ only supports relative addressing to dynamic state (default) and + * absolute addressing. + * + * These flags are ignored for the BSD and BLT rings. + */ +#define I915_EXEC_CONSTANTS_MASK (3<<6) +#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ +#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) +#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ + __u64 flags; + __u64 rsvd1; /* now used for context info */ + __u64 rsvd2; +}; + +/** Resets the SO write offset registers for transform feedback on gen7. */ +#define I915_EXEC_GEN7_SOL_RESET (1<<8) + +/** Request a privileged ("secure") batch buffer. Note only available for + * DRM_ROOT_ONLY | DRM_MASTER processes. + */ +#define I915_EXEC_SECURE (1<<9) + +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + +/** Provide a hint to the kernel that the command stream and auxiliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1<<11) + +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<12) + +/** Used for switching BSD rings on the platforms with two BSD rings */ +#define I915_EXEC_BSD_SHIFT (13) +#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT) +/* default ping-pong mode */ +#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT) + +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1<<18) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) +#define i915_execbuffer2_set_context_id(eb2, context) \ + (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK +#define i915_execbuffer2_get_context_id(eb2) \ + ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) + +struct drm_i915_gem_pin { + /** Handle of the buffer to be pinned. */ + __u32 handle; + __u32 pad; + + /** alignment required within the aperture */ + __u64 alignment; + + /** Returned GTT offset of the buffer. */ + __u64 offset; +}; + +struct drm_i915_gem_unpin { + /** Handle of the buffer to be unpinned. */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + __u32 handle; + + /** Return busy status + * + * A return of 0 implies that the object is idle (after + * having flushed any pending activity), and a non-zero return that + * the object is still in-flight on the GPU. (The GPU has not yet + * signaled completion for all pending requests that reference the + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide. There are race conditions which prevent the + * report of which engines are busy from being always accurate. + * However, the converse is not true. If the object is idle, the + * result of the ioctl, that all engines are idle, is accurate. + * + * The returned dword is split into two fields to indicate both + * the engines on which the object is being read, and the + * engine on which it is currently being written (if any). + * + * The low word (bits 0:15) indicate if the object is being written + * to by any engine (there can only be one, as the GEM implicit + * synchronisation rules force writes to be serialised). Only the + * engine for the last write is reported. + * + * The high word (bits 16:31) are a bitmask of which engines are + * currently reading from the object. Multiple engines may be + * reading from the object simultaneously. + * + * The value of each engine is the same as specified in the + * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. + * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to + * the I915_EXEC_RENDER engine for execution, and so it is never + * reported as active itself. Some hardware may have parallel + * execution engines, e.g. multiple media engines, which are + * mapped to the same identifier in the EXECBUFFER2 ioctl and + * so are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. + */ + __u32 busy; +}; + +/** + * I915_CACHING_NONE + * + * GPU access is not coherent with cpu caches. Default for machines without an + * LLC. + */ +#define I915_CACHING_NONE 0 +/** + * I915_CACHING_CACHED + * + * GPU access is coherent with cpu caches and furthermore the data is cached in + * last-level caches shared between cpu cores and the gpu GT. Default on + * machines with HAS_LLC. + */ +#define I915_CACHING_CACHED 1 +/** + * I915_CACHING_DISPLAY + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no special + * cache mode (like write-through or gfdt flushing) is available. The kernel + * automatically sets this mode when using a buffer as a scanout target. + * Userspace can manually set this mode to avoid a costly stall and clflush in + * the hotpath of drawing the first frame. + */ +#define I915_CACHING_DISPLAY 2 + +struct drm_i915_gem_caching { + /** + * Handle of the buffer to set/get the caching level of. */ + __u32 handle; + + /** + * Cacheing level to apply or return value + * + * bits0-15 are for generic caching control (i.e. the above defined + * values). bits16-31 are reserved for platform-specific variations + * (e.g. l3$ caching on gen7). */ + __u32 caching; +}; + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 +/* Not seen by userland */ +#define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + __u32 handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + __u32 tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + __u32 stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + __u32 handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + __u32 tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping whilst bound. + */ + __u32 phys_swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + __u64 aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + __u64 aper_available_size; +}; + +struct drm_i915_get_pipe_from_crtc_id { + /** ID of CRTC being requested **/ + __u32 crtc_id; + + /** pipe of requested CRTC **/ + __u32 pipe; +}; + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + +/* flags */ +#define I915_OVERLAY_TYPE_MASK 0xff +#define I915_OVERLAY_YUV_PLANAR 0x01 +#define I915_OVERLAY_YUV_PACKED 0x02 +#define I915_OVERLAY_RGB 0x03 + +#define I915_OVERLAY_DEPTH_MASK 0xff00 +#define I915_OVERLAY_RGB24 0x1000 +#define I915_OVERLAY_RGB16 0x2000 +#define I915_OVERLAY_RGB15 0x3000 +#define I915_OVERLAY_YUV422 0x0100 +#define I915_OVERLAY_YUV411 0x0200 +#define I915_OVERLAY_YUV420 0x0300 +#define I915_OVERLAY_YUV410 0x0400 + +#define I915_OVERLAY_SWAP_MASK 0xff0000 +#define I915_OVERLAY_NO_SWAP 0x000000 +#define I915_OVERLAY_UV_SWAP 0x010000 +#define I915_OVERLAY_Y_SWAP 0x020000 +#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000 + +#define I915_OVERLAY_FLAGS_MASK 0xff000000 +#define I915_OVERLAY_ENABLE 0x01000000 + +struct drm_intel_overlay_put_image { + /* various flags and src format description */ + __u32 flags; + /* source picture description */ + __u32 bo_handle; + /* stride values and offsets are in bytes, buffer relative */ + __u16 stride_Y; /* stride for packed formats */ + __u16 stride_UV; + __u32 offset_Y; /* offset for packet formats */ + __u32 offset_U; + __u32 offset_V; + /* in pixels */ + __u16 src_width; + __u16 src_height; + /* to compensate the scaling factors for partially covered surfaces */ + __u16 src_scan_width; + __u16 src_scan_height; + /* output crtc description */ + __u32 crtc_id; + __u16 dst_x; + __u16 dst_y; + __u16 dst_width; + __u16 dst_height; +}; + +/* flags */ +#define I915_OVERLAY_UPDATE_ATTRS (1<<0) +#define I915_OVERLAY_UPDATE_GAMMA (1<<1) +#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1<<2) +struct drm_intel_overlay_attrs { + __u32 flags; + __u32 color_key; + __s32 brightness; + __u32 contrast; + __u32 saturation; + __u32 gamma0; + __u32 gamma1; + __u32 gamma2; + __u32 gamma3; + __u32 gamma4; + __u32 gamma5; +}; + +/* + * Intel sprite handling + * + * Color keying works with a min/mask/max tuple. Both source and destination + * color keying is allowed. + * + * Source keying: + * Sprite pixels within the min & max values, masked against the color channels + * specified in the mask field, will be transparent. All other pixels will + * be displayed on top of the primary plane. For RGB surfaces, only the min + * and mask fields will be used; ranged compares are not allowed. + * + * Destination keying: + * Primary plane pixels that match the min value, masked against the color + * channels specified in the mask field, will be replaced by corresponding + * pixels from the sprite plane. + * + * Note that source & destination keying are exclusive; only one can be + * active on a given plane. + */ + +#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_DESTINATION (1<<1) +#define I915_SET_COLORKEY_SOURCE (1<<2) +struct drm_intel_sprite_colorkey { + __u32 plane_id; + __u32 min_value; + __u32 channel_mask; + __u32 max_value; + __u32 flags; +}; + +struct drm_i915_gem_wait { + /** Handle of BO we shall wait on */ + __u32 bo_handle; + __u32 flags; + /** Number of nanoseconds to wait, Returns time remaining. */ + __s64 timeout_ns; +}; + +struct drm_i915_gem_context_create { + /* output: id of new context*/ + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_gem_context_destroy { + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_reg_read { + /* + * Register offset. + * For 64bit wide registers where the upper 32bits don't immediately + * follow the lower 32bits, the offset of the lower 32bits must + * be specified + */ + __u64 offset; + __u64 val; /* Return value */ +}; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ + +struct drm_i915_reset_stats { + __u32 ctx_id; + __u32 flags; + + /* All resets since boot/module reload, for all contexts */ + __u32 reset_count; + + /* Number of batches lost when active in GPU, for this context */ + __u32 batch_active; + + /* Number of batches lost pending for execution, for this context */ + __u32 batch_pending; + + __u32 pad; +}; + +struct drm_i915_gem_userptr { + __u64 user_ptr; + __u64 user_size; + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + +struct drm_i915_gem_context_param { + __u32 ctx_id; + __u32 size; + __u64 param; +#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 +#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 +#define I915_CONTEXT_PARAM_BANNABLE 0x5 + __u64 value; +}; + +enum drm_i915_oa_format { + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, + + I915_OA_FORMAT_MAX /* non-ABI */ +}; + +enum drm_i915_perf_property_id { + /** + * Open the stream for a specific context handle (as used with + * execbuffer2). A stream opened for a specific context this way + * won't typically require root privileges. + */ + DRM_I915_PERF_PROP_CTX_HANDLE = 1, + + /** + * A value of 1 requests the inclusion of raw OA unit reports as + * part of stream samples. + */ + DRM_I915_PERF_PROP_SAMPLE_OA, + + /** + * The value specifies which set of OA unit metrics should be + * be configured, defining the contents of any OA unit reports. + */ + DRM_I915_PERF_PROP_OA_METRICS_SET, + + /** + * The value specifies the size and layout of OA unit reports. + */ + DRM_I915_PERF_PROP_OA_FORMAT, + + /** + * Specifying this property implicitly requests periodic OA unit + * sampling and (at least on Haswell) the sampling frequency is derived + * from this exponent as follows: + * + * 80ns * 2^(period_exponent + 1) + */ + DRM_I915_PERF_PROP_OA_EXPONENT, + + DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +struct drm_i915_perf_open_param { + __u32 flags; +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) +#define I915_PERF_FLAG_DISABLED (1<<2) + + /** The number of u64 (id, value) pairs */ + __u32 num_properties; + + /** + * Pointer to array of u64 (id, value) pairs configuring the stream + * to open. + */ + __u64 properties_ptr; +}; + +/** + * Enable data capture for a stream that was either opened in a disabled state + * via I915_PERF_FLAG_DISABLED or was later disabled via + * I915_PERF_IOCTL_DISABLE. + * + * It is intended to be cheaper to disable and enable a stream than it may be + * to close and re-open a stream with the same configuration. + * + * It's undefined whether any pending data for the stream will be lost. + */ +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) + +/** + * Disable data capture for a stream. + * + * It is an error to try and read a stream that is disabled. + */ +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) + +/** + * Common to all i915 perf records + */ +struct drm_i915_perf_record_header { + __u32 type; + __u16 pad; + __u16 size; +}; + +enum drm_i915_perf_record_type { + + /** + * Samples are the work horse record type whose contents are extensible + * and defined when opening an i915 perf stream based on the given + * properties. + * + * Boolean properties following the naming convention + * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in + * every sample. + * + * The order of these sample properties given by userspace has no + * affect on the ordering of data within a sample. The order is + * documented here. + * + * struct { + * struct drm_i915_perf_record_header header; + * + * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA + * }; + */ + DRM_I915_PERF_RECORD_SAMPLE = 1, + + /* + * Indicates that one or more OA reports were not written by the + * hardware. This can happen for example if an MI_REPORT_PERF_COUNT + * command collides with periodic sampling - which would be more likely + * at higher sampling frequencies. + */ + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, + + /** + * An error occurred that resulted in all pending OA reports being lost. + */ + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, + + DRM_I915_PERF_RECORD_MAX /* non-ABI */ +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _UAPI_I915_DRM_H_ */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 2ee433c52130..eb4ccd73e826 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -74,6 +74,8 @@ tools/include/uapi/asm-generic/fcntl.h tools/include/uapi/asm-generic/ioctls.h tools/include/uapi/asm-generic/mman-common.h tools/include/uapi/asm-generic/mman.h +tools/include/uapi/drm/drm.h +tools/include/uapi/drm/i915_drm.h tools/include/uapi/linux/bpf.h tools/include/uapi/linux/bpf_common.h tools/include/uapi/linux/fcntl.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 3f0c6577f8cc..0024525376bf 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -1,6 +1,8 @@ #!/bin/sh HEADERS=' +include/uapi/drm/drm.h +include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h include/uapi/linux/sched.h From ef9811f0933967c4ee4cf0e1a88678d2607dca07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 16:47:37 -0300 Subject: [PATCH 151/253] perf trace beauty ioctl: Beautify DRM ioctl cmds This time we try a new approach, using uapi/drm/ copies of drm.h and i915_drm.h we auto generate the string tables, then include it in the ioctl cmd beautifier. This way either the DRM developers will add the new commands to the tools/ copy, like is happening with other areas of tools/include/ (bpf.h comes to mind), or we'll be notified when building perf that our copy drifted. Either way the time from a new command being added to when 'perf trace' gets to know it is greatly shortened, for instance: # strace -p 22401 -e ioctl ioctl(8, DRM_IOCTL_I915_GEM_BUSY, 0x7ffc934f7600) = 0 ioctl(8, DRM_IOCTL_I915_GEM_SET_DOMAIN, 0x7ffc934f7550) = 0 ioctl(8, DRM_IOCTL_I915_GEM_SW_FINISH, 0x7ffc934f76e0) = 0 ioctl(8, DRM_IOCTL_I915_GEM_SW_FINISH, 0x7ffc934f7780) = 0 ioctl(8, _IOC(_IOC_READ|_IOC_WRITE, 0x64, 0x69, 0x40), 0x7ffc934f7700) = 0 ioctl(8, DRM_IOCTL_I915_GEM_SET_DOMAIN, 0x7ffc934f7780) = 0 ioctl(8, DRM_IOCTL_I915_GEM_MADVISE, 0x7ffc934f76f0) = 0 ioctl(8, DRM_IOCTL_I915_GEM_BUSY, 0x7ffc934f76c0) = 0 ioctl(8, DRM_IOCTL_I915_GEM_MADVISE, 0x7ffc934f76b0) = 0 ioctl(8, DRM_IOCTL_I915_GEM_SET_DOMAIN, 0x7ffc934f76d0) = 0 ioctl(8, DRM_IOCTL_MODE_ADDFB, 0x7ffc934f7880) = 0 ioctl(8, DRM_IOCTL_MODE_PAGE_FLIP, 0x7ffc934f77d0) = 0 ^Cstrace: Process 22401 detached versus: # perf trace -p 22401 -e ioctl 1010.856 (0.006 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_BUSY, arg: 0x7ffc934f7600) = 0 1010.865 (0.003 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_SET_DOMAIN, arg: 0x7ffc934f7550) = 0 1010.872 (0.002 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_SW_FINISH, arg: 0x7ffc934f76e0) = 0 1010.939 (0.015 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_SW_FINISH, arg: 0x7ffc934f7780) = 0 1010.959 (0.085 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_EXECBUFFER2, arg: 0x7ffc934f7700) = 0 1011.048 (0.003 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_SET_DOMAIN, arg: 0x7ffc934f7780) = 0 1011.056 (0.002 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_MADVISE, arg: 0x7ffc934f76f0) = 0 1011.060 (0.002 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_BUSY, arg: 0x7ffc934f76c0) = 0 1011.064 (0.003 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_MADVISE, arg: 0x7ffc934f76b0) = 0 1011.068 (0.002 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_I915_GEM_SET_DOMAIN, arg: 0x7ffc934f76d0) = 0 1011.074 (0.009 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_MODE_ADDFB, arg: 0x7ffc934f7880 ) = 0 1011.096 (0.072 ms): gnome-shell/22401 ioctl(fd: 8, cmd: DRM_MODE_PAGE_FLIP, arg: 0x7ffc934f77d0) = 0 ^C[root@jouet linux]# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-mly2d7v9kf28rso81dijbixq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 16 ++++++++++++++-- tools/perf/trace/beauty/drm_ioctl.sh | 13 +++++++++++++ tools/perf/trace/beauty/ioctl.c | 14 +++++++++++++- 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100755 tools/perf/trace/beauty/drm_ioctl.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d66f90e6be5c..78654995b99f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -381,6 +381,17 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +drm_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl +drm_ioctl_array := $(drm_ioctl_outdir)/drm_ioctl_array.c +drm_hdr_dir := $(srctree)/tools/include/uapi/drm +drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh + +# Create output directory if not already present +_dummy := $(shell [ -d '$(drm_ioctl_outdir)' ] || mkdir -p '$(drm_ioctl_outdir)') + +$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) + $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -475,7 +486,7 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -740,7 +751,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ - $(OUTPUT)pmu-events/pmu-events.c + $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)$(drm_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh new file mode 100755 index 000000000000..2149d3a98e42 --- /dev/null +++ b/tools/perf/trace/beauty/drm_ioctl.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +drm_header_dir=$1 +printf "#ifndef DRM_COMMAND_BASE\n" +grep "#define DRM_COMMAND_BASE" $drm_header_dir/drm.h +printf "#endif\n" + +printf "static const char *drm_ioctl_cmds[] = {\n" +grep "^#define DRM_IOCTL.*DRM_IO" $drm_header_dir/drm.h | \ + sed -r 's/^#define +DRM_IOCTL_([A-Z0-9_]+)[ ]+DRM_IO[A-Z]* *\( *(0x[[:xdigit:]]+),*.*/ [\2] = "\1",/g' +grep "^#define DRM_I915_[A-Z_0-9]\+[ ]\+0x" $drm_header_dir/i915_drm.h | \ + sed -r 's/^#define +DRM_I915_([A-Z0-9_]+)[ ]+(0x[[:xdigit:]]+)/\t[DRM_COMMAND_BASE + \2] = "I915_\1",/g' +printf "};\n" diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 5f0dba8aaa88..732489d141c4 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -44,6 +44,17 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, char *bf, size_t size) return scnprintf(bf, size, "(%#x, %#x)", 'T', nr); } +static size_t ioctl__scnprintf_drm_cmd(int nr, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/drm_ioctl_array.c" + static DEFINE_STRARRAY(drm_ioctl_cmds); + + if (nr < strarray__drm_ioctl_cmds.nr_entries && strarray__drm_ioctl_cmds.entries[nr] != NULL) + return scnprintf(bf, size, "DRM_%s", strarray__drm_ioctl_cmds.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x)", 'd', nr); +} + static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { int dir = _IOC_DIR(cmd), @@ -55,7 +66,8 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) int type; size_t (*scnprintf)(int nr, char *bf, size_t size); } ioctl_types[] = { /* Must be ordered by type */ - { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, } + { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, + ['d' - 'T'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, } }; const int nr_types = ARRAY_SIZE(ioctl_types); From a215684e10484149313068e0a9ea1978ee36d4f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 16:45:32 -0300 Subject: [PATCH 152/253] tools include uapi: Grab a copy of sound/asound.h We will use it to generate tables for beautifying ioctl's 'cmd' arg. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wit4wwmrh9d37dtgtk0glbbj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/sound/asound.h | 1026 +++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 1 + 3 files changed, 1028 insertions(+) create mode 100644 tools/include/uapi/sound/asound.h diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h new file mode 100644 index 000000000000..87bf30b182df --- /dev/null +++ b/tools/include/uapi/sound/asound.h @@ -0,0 +1,1026 @@ +/* + * Advanced Linux Sound Architecture - ALSA - Driver + * Copyright (c) 1994-2003 by Jaroslav Kysela , + * Abramo Bagnara + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _UAPI__SOUND_ASOUND_H +#define _UAPI__SOUND_ASOUND_H + +#if defined(__KERNEL__) || defined(__linux__) +#include +#else +#include +#endif + +#ifndef __KERNEL__ +#include +#endif + +/* + * protocol version + */ + +#define SNDRV_PROTOCOL_VERSION(major, minor, subminor) (((major)<<16)|((minor)<<8)|(subminor)) +#define SNDRV_PROTOCOL_MAJOR(version) (((version)>>16)&0xffff) +#define SNDRV_PROTOCOL_MINOR(version) (((version)>>8)&0xff) +#define SNDRV_PROTOCOL_MICRO(version) ((version)&0xff) +#define SNDRV_PROTOCOL_INCOMPATIBLE(kversion, uversion) \ + (SNDRV_PROTOCOL_MAJOR(kversion) != SNDRV_PROTOCOL_MAJOR(uversion) || \ + (SNDRV_PROTOCOL_MAJOR(kversion) == SNDRV_PROTOCOL_MAJOR(uversion) && \ + SNDRV_PROTOCOL_MINOR(kversion) != SNDRV_PROTOCOL_MINOR(uversion))) + +/**************************************************************************** + * * + * Digital audio interface * + * * + ****************************************************************************/ + +struct snd_aes_iec958 { + unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char subcode[147]; /* AES/IEC958 subcode bits */ + unsigned char pad; /* nothing */ + unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ +}; + +/**************************************************************************** + * * + * CEA-861 Audio InfoFrame. Used in HDMI and DisplayPort * + * * + ****************************************************************************/ + +struct snd_cea_861_aud_if { + unsigned char db1_ct_cc; /* coding type and channel count */ + unsigned char db2_sf_ss; /* sample frequency and size */ + unsigned char db3; /* not used, all zeros */ + unsigned char db4_ca; /* channel allocation code */ + unsigned char db5_dminh_lsv; /* downmix inhibit & level-shit values */ +}; + +/**************************************************************************** + * * + * Section for driver hardware dependent interface - /dev/snd/hw? * + * * + ****************************************************************************/ + +#define SNDRV_HWDEP_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 1) + +enum { + SNDRV_HWDEP_IFACE_OPL2 = 0, + SNDRV_HWDEP_IFACE_OPL3, + SNDRV_HWDEP_IFACE_OPL4, + SNDRV_HWDEP_IFACE_SB16CSP, /* Creative Signal Processor */ + SNDRV_HWDEP_IFACE_EMU10K1, /* FX8010 processor in EMU10K1 chip */ + SNDRV_HWDEP_IFACE_YSS225, /* Yamaha FX processor */ + SNDRV_HWDEP_IFACE_ICS2115, /* Wavetable synth */ + SNDRV_HWDEP_IFACE_SSCAPE, /* Ensoniq SoundScape ISA card (MC68EC000) */ + SNDRV_HWDEP_IFACE_VX, /* Digigram VX cards */ + SNDRV_HWDEP_IFACE_MIXART, /* Digigram miXart cards */ + SNDRV_HWDEP_IFACE_USX2Y, /* Tascam US122, US224 & US428 usb */ + SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */ + SNDRV_HWDEP_IFACE_BLUETOOTH, /* Bluetooth audio */ + SNDRV_HWDEP_IFACE_USX2Y_PCM, /* Tascam US122, US224 & US428 rawusb pcm */ + SNDRV_HWDEP_IFACE_PCXHR, /* Digigram PCXHR */ + SNDRV_HWDEP_IFACE_SB_RC, /* SB Extigy/Audigy2NX remote control */ + SNDRV_HWDEP_IFACE_HDA, /* HD-audio */ + SNDRV_HWDEP_IFACE_USB_STREAM, /* direct access to usb stream */ + SNDRV_HWDEP_IFACE_FW_DICE, /* TC DICE FireWire device */ + SNDRV_HWDEP_IFACE_FW_FIREWORKS, /* Echo Audio Fireworks based device */ + SNDRV_HWDEP_IFACE_FW_BEBOB, /* BridgeCo BeBoB based device */ + SNDRV_HWDEP_IFACE_FW_OXFW, /* Oxford OXFW970/971 based device */ + SNDRV_HWDEP_IFACE_FW_DIGI00X, /* Digidesign Digi 002/003 family */ + SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */ + SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */ + SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */ + SNDRV_HWDEP_IFACE_FW_FIREFACE, /* RME Fireface series */ + + /* Don't forget to change the following: */ + SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_FIREFACE +}; + +struct snd_hwdep_info { + unsigned int device; /* WR: device number */ + int card; /* R: card number */ + unsigned char id[64]; /* ID (user selectable) */ + unsigned char name[80]; /* hwdep name */ + int iface; /* hwdep interface */ + unsigned char reserved[64]; /* reserved for future */ +}; + +/* generic DSP loader */ +struct snd_hwdep_dsp_status { + unsigned int version; /* R: driver-specific version */ + unsigned char id[32]; /* R: driver-specific ID string */ + unsigned int num_dsps; /* R: number of DSP images to transfer */ + unsigned int dsp_loaded; /* R: bit flags indicating the loaded DSPs */ + unsigned int chip_ready; /* R: 1 = initialization finished */ + unsigned char reserved[16]; /* reserved for future use */ +}; + +struct snd_hwdep_dsp_image { + unsigned int index; /* W: DSP index */ + unsigned char name[64]; /* W: ID (e.g. file name) */ + unsigned char __user *image; /* W: binary image */ + size_t length; /* W: size of image in bytes */ + unsigned long driver_data; /* W: driver-specific data */ +}; + +#define SNDRV_HWDEP_IOCTL_PVERSION _IOR ('H', 0x00, int) +#define SNDRV_HWDEP_IOCTL_INFO _IOR ('H', 0x01, struct snd_hwdep_info) +#define SNDRV_HWDEP_IOCTL_DSP_STATUS _IOR('H', 0x02, struct snd_hwdep_dsp_status) +#define SNDRV_HWDEP_IOCTL_DSP_LOAD _IOW('H', 0x03, struct snd_hwdep_dsp_image) + +/***************************************************************************** + * * + * Digital Audio (PCM) interface - /dev/snd/pcm?? * + * * + *****************************************************************************/ + +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 14) + +typedef unsigned long snd_pcm_uframes_t; +typedef signed long snd_pcm_sframes_t; + +enum { + SNDRV_PCM_CLASS_GENERIC = 0, /* standard mono or stereo device */ + SNDRV_PCM_CLASS_MULTI, /* multichannel device */ + SNDRV_PCM_CLASS_MODEM, /* software modem class */ + SNDRV_PCM_CLASS_DIGITIZER, /* digitizer class */ + /* Don't forget to change the following: */ + SNDRV_PCM_CLASS_LAST = SNDRV_PCM_CLASS_DIGITIZER, +}; + +enum { + SNDRV_PCM_SUBCLASS_GENERIC_MIX = 0, /* mono or stereo subdevices are mixed together */ + SNDRV_PCM_SUBCLASS_MULTI_MIX, /* multichannel subdevices are mixed together */ + /* Don't forget to change the following: */ + SNDRV_PCM_SUBCLASS_LAST = SNDRV_PCM_SUBCLASS_MULTI_MIX, +}; + +enum { + SNDRV_PCM_STREAM_PLAYBACK = 0, + SNDRV_PCM_STREAM_CAPTURE, + SNDRV_PCM_STREAM_LAST = SNDRV_PCM_STREAM_CAPTURE, +}; + +typedef int __bitwise snd_pcm_access_t; +#define SNDRV_PCM_ACCESS_MMAP_INTERLEAVED ((__force snd_pcm_access_t) 0) /* interleaved mmap */ +#define SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED ((__force snd_pcm_access_t) 1) /* noninterleaved mmap */ +#define SNDRV_PCM_ACCESS_MMAP_COMPLEX ((__force snd_pcm_access_t) 2) /* complex mmap */ +#define SNDRV_PCM_ACCESS_RW_INTERLEAVED ((__force snd_pcm_access_t) 3) /* readi/writei */ +#define SNDRV_PCM_ACCESS_RW_NONINTERLEAVED ((__force snd_pcm_access_t) 4) /* readn/writen */ +#define SNDRV_PCM_ACCESS_LAST SNDRV_PCM_ACCESS_RW_NONINTERLEAVED + +typedef int __bitwise snd_pcm_format_t; +#define SNDRV_PCM_FORMAT_S8 ((__force snd_pcm_format_t) 0) +#define SNDRV_PCM_FORMAT_U8 ((__force snd_pcm_format_t) 1) +#define SNDRV_PCM_FORMAT_S16_LE ((__force snd_pcm_format_t) 2) +#define SNDRV_PCM_FORMAT_S16_BE ((__force snd_pcm_format_t) 3) +#define SNDRV_PCM_FORMAT_U16_LE ((__force snd_pcm_format_t) 4) +#define SNDRV_PCM_FORMAT_U16_BE ((__force snd_pcm_format_t) 5) +#define SNDRV_PCM_FORMAT_S24_LE ((__force snd_pcm_format_t) 6) /* low three bytes */ +#define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ +#define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ +#define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ +#define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) +#define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) +#define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) +#define SNDRV_PCM_FORMAT_U32_BE ((__force snd_pcm_format_t) 13) +#define SNDRV_PCM_FORMAT_FLOAT_LE ((__force snd_pcm_format_t) 14) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */ +#define SNDRV_PCM_FORMAT_FLOAT_BE ((__force snd_pcm_format_t) 15) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */ +#define SNDRV_PCM_FORMAT_FLOAT64_LE ((__force snd_pcm_format_t) 16) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */ +#define SNDRV_PCM_FORMAT_FLOAT64_BE ((__force snd_pcm_format_t) 17) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */ +#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE ((__force snd_pcm_format_t) 18) /* IEC-958 subframe, Little Endian */ +#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE ((__force snd_pcm_format_t) 19) /* IEC-958 subframe, Big Endian */ +#define SNDRV_PCM_FORMAT_MU_LAW ((__force snd_pcm_format_t) 20) +#define SNDRV_PCM_FORMAT_A_LAW ((__force snd_pcm_format_t) 21) +#define SNDRV_PCM_FORMAT_IMA_ADPCM ((__force snd_pcm_format_t) 22) +#define SNDRV_PCM_FORMAT_MPEG ((__force snd_pcm_format_t) 23) +#define SNDRV_PCM_FORMAT_GSM ((__force snd_pcm_format_t) 24) +#define SNDRV_PCM_FORMAT_SPECIAL ((__force snd_pcm_format_t) 31) +#define SNDRV_PCM_FORMAT_S24_3LE ((__force snd_pcm_format_t) 32) /* in three bytes */ +#define SNDRV_PCM_FORMAT_S24_3BE ((__force snd_pcm_format_t) 33) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U24_3LE ((__force snd_pcm_format_t) 34) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U24_3BE ((__force snd_pcm_format_t) 35) /* in three bytes */ +#define SNDRV_PCM_FORMAT_S20_3LE ((__force snd_pcm_format_t) 36) /* in three bytes */ +#define SNDRV_PCM_FORMAT_S20_3BE ((__force snd_pcm_format_t) 37) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U20_3LE ((__force snd_pcm_format_t) 38) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U20_3BE ((__force snd_pcm_format_t) 39) /* in three bytes */ +#define SNDRV_PCM_FORMAT_S18_3LE ((__force snd_pcm_format_t) 40) /* in three bytes */ +#define SNDRV_PCM_FORMAT_S18_3BE ((__force snd_pcm_format_t) 41) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U18_3LE ((__force snd_pcm_format_t) 42) /* in three bytes */ +#define SNDRV_PCM_FORMAT_U18_3BE ((__force snd_pcm_format_t) 43) /* in three bytes */ +#define SNDRV_PCM_FORMAT_G723_24 ((__force snd_pcm_format_t) 44) /* 8 samples in 3 bytes */ +#define SNDRV_PCM_FORMAT_G723_24_1B ((__force snd_pcm_format_t) 45) /* 1 sample in 1 byte */ +#define SNDRV_PCM_FORMAT_G723_40 ((__force snd_pcm_format_t) 46) /* 8 Samples in 5 bytes */ +#define SNDRV_PCM_FORMAT_G723_40_1B ((__force snd_pcm_format_t) 47) /* 1 sample in 1 byte */ +#define SNDRV_PCM_FORMAT_DSD_U8 ((__force snd_pcm_format_t) 48) /* DSD, 1-byte samples DSD (x8) */ +#define SNDRV_PCM_FORMAT_DSD_U16_LE ((__force snd_pcm_format_t) 49) /* DSD, 2-byte samples DSD (x16), little endian */ +#define SNDRV_PCM_FORMAT_DSD_U32_LE ((__force snd_pcm_format_t) 50) /* DSD, 4-byte samples DSD (x32), little endian */ +#define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */ +#define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */ +#define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE + +#ifdef SNDRV_LITTLE_ENDIAN +#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE +#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_LE +#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_LE +#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_LE +#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_LE +#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_LE +#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_LE +#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_LE +#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE +#endif +#ifdef SNDRV_BIG_ENDIAN +#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_BE +#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_BE +#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_BE +#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_BE +#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_BE +#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_BE +#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_BE +#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_BE +#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE +#endif + +typedef int __bitwise snd_pcm_subformat_t; +#define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0) +#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD + +#define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */ +#define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */ +#define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */ +#define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */ +#define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */ +#define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */ +#define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */ +#define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */ +#define SNDRV_PCM_INFO_BLOCK_TRANSFER 0x00010000 /* hardware transfer block of samples */ +#define SNDRV_PCM_INFO_OVERRANGE 0x00020000 /* hardware supports ADC (capture) overrange detection */ +#define SNDRV_PCM_INFO_RESUME 0x00040000 /* hardware supports stream resume after suspend */ +#define SNDRV_PCM_INFO_PAUSE 0x00080000 /* pause ioctl is supported */ +#define SNDRV_PCM_INFO_HALF_DUPLEX 0x00100000 /* only half duplex */ +#define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */ +#define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */ +#define SNDRV_PCM_INFO_NO_PERIOD_WAKEUP 0x00800000 /* period wakeup can be disabled */ +#define SNDRV_PCM_INFO_HAS_WALL_CLOCK 0x01000000 /* (Deprecated)has audio wall clock for audio/system time sync */ +#define SNDRV_PCM_INFO_HAS_LINK_ATIME 0x01000000 /* report hardware link audio time, reset on startup */ +#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ +#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ +#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ + +#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ +#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ + + + +typedef int __bitwise snd_pcm_state_t; +#define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */ +#define SNDRV_PCM_STATE_SETUP ((__force snd_pcm_state_t) 1) /* stream has a setup */ +#define SNDRV_PCM_STATE_PREPARED ((__force snd_pcm_state_t) 2) /* stream is ready to start */ +#define SNDRV_PCM_STATE_RUNNING ((__force snd_pcm_state_t) 3) /* stream is running */ +#define SNDRV_PCM_STATE_XRUN ((__force snd_pcm_state_t) 4) /* stream reached an xrun */ +#define SNDRV_PCM_STATE_DRAINING ((__force snd_pcm_state_t) 5) /* stream is draining */ +#define SNDRV_PCM_STATE_PAUSED ((__force snd_pcm_state_t) 6) /* stream is paused */ +#define SNDRV_PCM_STATE_SUSPENDED ((__force snd_pcm_state_t) 7) /* hardware is suspended */ +#define SNDRV_PCM_STATE_DISCONNECTED ((__force snd_pcm_state_t) 8) /* hardware is disconnected */ +#define SNDRV_PCM_STATE_LAST SNDRV_PCM_STATE_DISCONNECTED + +enum { + SNDRV_PCM_MMAP_OFFSET_DATA = 0x00000000, + SNDRV_PCM_MMAP_OFFSET_STATUS = 0x80000000, + SNDRV_PCM_MMAP_OFFSET_CONTROL = 0x81000000, +}; + +union snd_pcm_sync_id { + unsigned char id[16]; + unsigned short id16[8]; + unsigned int id32[4]; +}; + +struct snd_pcm_info { + unsigned int device; /* RO/WR (control): device number */ + unsigned int subdevice; /* RO/WR (control): subdevice number */ + int stream; /* RO/WR (control): stream direction */ + int card; /* R: card number */ + unsigned char id[64]; /* ID (user selectable) */ + unsigned char name[80]; /* name of this device */ + unsigned char subname[32]; /* subdevice name */ + int dev_class; /* SNDRV_PCM_CLASS_* */ + int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ + unsigned int subdevices_count; + unsigned int subdevices_avail; + union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char reserved[64]; /* reserved for future... */ +}; + +typedef int snd_pcm_hw_param_t; +#define SNDRV_PCM_HW_PARAM_ACCESS 0 /* Access type */ +#define SNDRV_PCM_HW_PARAM_FORMAT 1 /* Format */ +#define SNDRV_PCM_HW_PARAM_SUBFORMAT 2 /* Subformat */ +#define SNDRV_PCM_HW_PARAM_FIRST_MASK SNDRV_PCM_HW_PARAM_ACCESS +#define SNDRV_PCM_HW_PARAM_LAST_MASK SNDRV_PCM_HW_PARAM_SUBFORMAT + +#define SNDRV_PCM_HW_PARAM_SAMPLE_BITS 8 /* Bits per sample */ +#define SNDRV_PCM_HW_PARAM_FRAME_BITS 9 /* Bits per frame */ +#define SNDRV_PCM_HW_PARAM_CHANNELS 10 /* Channels */ +#define SNDRV_PCM_HW_PARAM_RATE 11 /* Approx rate */ +#define SNDRV_PCM_HW_PARAM_PERIOD_TIME 12 /* Approx distance between + * interrupts in us + */ +#define SNDRV_PCM_HW_PARAM_PERIOD_SIZE 13 /* Approx frames between + * interrupts + */ +#define SNDRV_PCM_HW_PARAM_PERIOD_BYTES 14 /* Approx bytes between + * interrupts + */ +#define SNDRV_PCM_HW_PARAM_PERIODS 15 /* Approx interrupts per + * buffer + */ +#define SNDRV_PCM_HW_PARAM_BUFFER_TIME 16 /* Approx duration of buffer + * in us + */ +#define SNDRV_PCM_HW_PARAM_BUFFER_SIZE 17 /* Size of buffer in frames */ +#define SNDRV_PCM_HW_PARAM_BUFFER_BYTES 18 /* Size of buffer in bytes */ +#define SNDRV_PCM_HW_PARAM_TICK_TIME 19 /* Approx tick duration in us */ +#define SNDRV_PCM_HW_PARAM_FIRST_INTERVAL SNDRV_PCM_HW_PARAM_SAMPLE_BITS +#define SNDRV_PCM_HW_PARAM_LAST_INTERVAL SNDRV_PCM_HW_PARAM_TICK_TIME + +#define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */ +#define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */ +#define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */ + +struct snd_interval { + unsigned int min, max; + unsigned int openmin:1, + openmax:1, + integer:1, + empty:1; +}; + +#define SNDRV_MASK_MAX 256 + +struct snd_mask { + __u32 bits[(SNDRV_MASK_MAX+31)/32]; +}; + +struct snd_pcm_hw_params { + unsigned int flags; + struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - + SNDRV_PCM_HW_PARAM_FIRST_MASK + 1]; + struct snd_mask mres[5]; /* reserved masks */ + struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL - + SNDRV_PCM_HW_PARAM_FIRST_INTERVAL + 1]; + struct snd_interval ires[9]; /* reserved intervals */ + unsigned int rmask; /* W: requested masks */ + unsigned int cmask; /* R: changed masks */ + unsigned int info; /* R: Info flags for returned setup */ + unsigned int msbits; /* R: used most significant bits */ + unsigned int rate_num; /* R: rate numerator */ + unsigned int rate_den; /* R: rate denominator */ + snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ + unsigned char reserved[64]; /* reserved for future */ +}; + +enum { + SNDRV_PCM_TSTAMP_NONE = 0, + SNDRV_PCM_TSTAMP_ENABLE, + SNDRV_PCM_TSTAMP_LAST = SNDRV_PCM_TSTAMP_ENABLE, +}; + +struct snd_pcm_sw_params { + int tstamp_mode; /* timestamp mode */ + unsigned int period_step; + unsigned int sleep_min; /* min ticks to sleep */ + snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */ + snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */ + snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */ + snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */ + snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */ + snd_pcm_uframes_t silence_size; /* silence block size */ + snd_pcm_uframes_t boundary; /* pointers wrap point */ + unsigned int proto; /* protocol version */ + unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */ + unsigned char reserved[56]; /* reserved for future */ +}; + +struct snd_pcm_channel_info { + unsigned int channel; + __kernel_off_t offset; /* mmap offset */ + unsigned int first; /* offset to first sample in bits */ + unsigned int step; /* samples distance in bits */ +}; + +enum { + /* + * first definition for backwards compatibility only, + * maps to wallclock/link time for HDAudio playback and DEFAULT/DMA time for everything else + */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT = 0, + + /* timestamp definitions */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT = 1, /* DMA time, reported as per hw_ptr */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK = 2, /* link time reported by sample or wallclock counter, reset on startup */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ABSOLUTE = 3, /* link time reported by sample or wallclock counter, not reset on startup */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ESTIMATED = 4, /* link time estimated indirectly */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED = 5, /* link time synchronized with system time */ + SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED +}; + +struct snd_pcm_status { + snd_pcm_state_t state; /* stream state */ + struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */ + struct timespec tstamp; /* reference timestamp */ + snd_pcm_uframes_t appl_ptr; /* appl ptr */ + snd_pcm_uframes_t hw_ptr; /* hw ptr */ + snd_pcm_sframes_t delay; /* current delay in frames */ + snd_pcm_uframes_t avail; /* number of frames available */ + snd_pcm_uframes_t avail_max; /* max frames available on hw since last status */ + snd_pcm_uframes_t overrange; /* count of ADC (capture) overrange detections from last status */ + snd_pcm_state_t suspended_state; /* suspended stream state */ + __u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */ + struct timespec audio_tstamp; /* sample counter, wall clock, PHC or on-demand sync'ed */ + struct timespec driver_tstamp; /* useful in case reference system tstamp is reported with delay */ + __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */ + unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */ +}; + +struct snd_pcm_mmap_status { + snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */ + int pad1; /* Needed for 64 bit alignment */ + snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */ + struct timespec tstamp; /* Timestamp */ + snd_pcm_state_t suspended_state; /* RO: suspended stream state */ + struct timespec audio_tstamp; /* from sample counter or wall clock */ +}; + +struct snd_pcm_mmap_control { + snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */ + snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */ +}; + +#define SNDRV_PCM_SYNC_PTR_HWSYNC (1<<0) /* execute hwsync */ +#define SNDRV_PCM_SYNC_PTR_APPL (1<<1) /* get appl_ptr from driver (r/w op) */ +#define SNDRV_PCM_SYNC_PTR_AVAIL_MIN (1<<2) /* get avail_min from driver */ + +struct snd_pcm_sync_ptr { + unsigned int flags; + union { + struct snd_pcm_mmap_status status; + unsigned char reserved[64]; + } s; + union { + struct snd_pcm_mmap_control control; + unsigned char reserved[64]; + } c; +}; + +struct snd_xferi { + snd_pcm_sframes_t result; + void __user *buf; + snd_pcm_uframes_t frames; +}; + +struct snd_xfern { + snd_pcm_sframes_t result; + void __user * __user *bufs; + snd_pcm_uframes_t frames; +}; + +enum { + SNDRV_PCM_TSTAMP_TYPE_GETTIMEOFDAY = 0, /* gettimeofday equivalent */ + SNDRV_PCM_TSTAMP_TYPE_MONOTONIC, /* posix_clock_monotonic equivalent */ + SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW, /* monotonic_raw (no NTP) */ + SNDRV_PCM_TSTAMP_TYPE_LAST = SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW, +}; + +/* channel positions */ +enum { + SNDRV_CHMAP_UNKNOWN = 0, + SNDRV_CHMAP_NA, /* N/A, silent */ + SNDRV_CHMAP_MONO, /* mono stream */ + /* this follows the alsa-lib mixer channel value + 3 */ + SNDRV_CHMAP_FL, /* front left */ + SNDRV_CHMAP_FR, /* front right */ + SNDRV_CHMAP_RL, /* rear left */ + SNDRV_CHMAP_RR, /* rear right */ + SNDRV_CHMAP_FC, /* front center */ + SNDRV_CHMAP_LFE, /* LFE */ + SNDRV_CHMAP_SL, /* side left */ + SNDRV_CHMAP_SR, /* side right */ + SNDRV_CHMAP_RC, /* rear center */ + /* new definitions */ + SNDRV_CHMAP_FLC, /* front left center */ + SNDRV_CHMAP_FRC, /* front right center */ + SNDRV_CHMAP_RLC, /* rear left center */ + SNDRV_CHMAP_RRC, /* rear right center */ + SNDRV_CHMAP_FLW, /* front left wide */ + SNDRV_CHMAP_FRW, /* front right wide */ + SNDRV_CHMAP_FLH, /* front left high */ + SNDRV_CHMAP_FCH, /* front center high */ + SNDRV_CHMAP_FRH, /* front right high */ + SNDRV_CHMAP_TC, /* top center */ + SNDRV_CHMAP_TFL, /* top front left */ + SNDRV_CHMAP_TFR, /* top front right */ + SNDRV_CHMAP_TFC, /* top front center */ + SNDRV_CHMAP_TRL, /* top rear left */ + SNDRV_CHMAP_TRR, /* top rear right */ + SNDRV_CHMAP_TRC, /* top rear center */ + /* new definitions for UAC2 */ + SNDRV_CHMAP_TFLC, /* top front left center */ + SNDRV_CHMAP_TFRC, /* top front right center */ + SNDRV_CHMAP_TSL, /* top side left */ + SNDRV_CHMAP_TSR, /* top side right */ + SNDRV_CHMAP_LLFE, /* left LFE */ + SNDRV_CHMAP_RLFE, /* right LFE */ + SNDRV_CHMAP_BC, /* bottom center */ + SNDRV_CHMAP_BLC, /* bottom left center */ + SNDRV_CHMAP_BRC, /* bottom right center */ + SNDRV_CHMAP_LAST = SNDRV_CHMAP_BRC, +}; + +#define SNDRV_CHMAP_POSITION_MASK 0xffff +#define SNDRV_CHMAP_PHASE_INVERSE (0x01 << 16) +#define SNDRV_CHMAP_DRIVER_SPEC (0x02 << 16) + +#define SNDRV_PCM_IOCTL_PVERSION _IOR('A', 0x00, int) +#define SNDRV_PCM_IOCTL_INFO _IOR('A', 0x01, struct snd_pcm_info) +#define SNDRV_PCM_IOCTL_TSTAMP _IOW('A', 0x02, int) +#define SNDRV_PCM_IOCTL_TTSTAMP _IOW('A', 0x03, int) +#define SNDRV_PCM_IOCTL_USER_PVERSION _IOW('A', 0x04, int) +#define SNDRV_PCM_IOCTL_HW_REFINE _IOWR('A', 0x10, struct snd_pcm_hw_params) +#define SNDRV_PCM_IOCTL_HW_PARAMS _IOWR('A', 0x11, struct snd_pcm_hw_params) +#define SNDRV_PCM_IOCTL_HW_FREE _IO('A', 0x12) +#define SNDRV_PCM_IOCTL_SW_PARAMS _IOWR('A', 0x13, struct snd_pcm_sw_params) +#define SNDRV_PCM_IOCTL_STATUS _IOR('A', 0x20, struct snd_pcm_status) +#define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t) +#define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22) +#define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr) +#define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status) +#define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info) +#define SNDRV_PCM_IOCTL_PREPARE _IO('A', 0x40) +#define SNDRV_PCM_IOCTL_RESET _IO('A', 0x41) +#define SNDRV_PCM_IOCTL_START _IO('A', 0x42) +#define SNDRV_PCM_IOCTL_DROP _IO('A', 0x43) +#define SNDRV_PCM_IOCTL_DRAIN _IO('A', 0x44) +#define SNDRV_PCM_IOCTL_PAUSE _IOW('A', 0x45, int) +#define SNDRV_PCM_IOCTL_REWIND _IOW('A', 0x46, snd_pcm_uframes_t) +#define SNDRV_PCM_IOCTL_RESUME _IO('A', 0x47) +#define SNDRV_PCM_IOCTL_XRUN _IO('A', 0x48) +#define SNDRV_PCM_IOCTL_FORWARD _IOW('A', 0x49, snd_pcm_uframes_t) +#define SNDRV_PCM_IOCTL_WRITEI_FRAMES _IOW('A', 0x50, struct snd_xferi) +#define SNDRV_PCM_IOCTL_READI_FRAMES _IOR('A', 0x51, struct snd_xferi) +#define SNDRV_PCM_IOCTL_WRITEN_FRAMES _IOW('A', 0x52, struct snd_xfern) +#define SNDRV_PCM_IOCTL_READN_FRAMES _IOR('A', 0x53, struct snd_xfern) +#define SNDRV_PCM_IOCTL_LINK _IOW('A', 0x60, int) +#define SNDRV_PCM_IOCTL_UNLINK _IO('A', 0x61) + +/***************************************************************************** + * * + * MIDI v1.0 interface * + * * + *****************************************************************************/ + +/* + * Raw MIDI section - /dev/snd/midi?? + */ + +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 0) + +enum { + SNDRV_RAWMIDI_STREAM_OUTPUT = 0, + SNDRV_RAWMIDI_STREAM_INPUT, + SNDRV_RAWMIDI_STREAM_LAST = SNDRV_RAWMIDI_STREAM_INPUT, +}; + +#define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001 +#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 +#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 + +struct snd_rawmidi_info { + unsigned int device; /* RO/WR (control): device number */ + unsigned int subdevice; /* RO/WR (control): subdevice number */ + int stream; /* WR: stream */ + int card; /* R: card number */ + unsigned int flags; /* SNDRV_RAWMIDI_INFO_XXXX */ + unsigned char id[64]; /* ID (user selectable) */ + unsigned char name[80]; /* name of device */ + unsigned char subname[32]; /* name of active or selected subdevice */ + unsigned int subdevices_count; + unsigned int subdevices_avail; + unsigned char reserved[64]; /* reserved for future use */ +}; + +struct snd_rawmidi_params { + int stream; + size_t buffer_size; /* queue size in bytes */ + size_t avail_min; /* minimum avail bytes for wakeup */ + unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */ + unsigned char reserved[16]; /* reserved for future use */ +}; + +struct snd_rawmidi_status { + int stream; + struct timespec tstamp; /* Timestamp */ + size_t avail; /* available bytes */ + size_t xruns; /* count of overruns since last status (in bytes) */ + unsigned char reserved[16]; /* reserved for future use */ +}; + +#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) +#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) +#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) +#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) +#define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int) + +/* + * Timer section - /dev/snd/timer + */ + +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 6) + +enum { + SNDRV_TIMER_CLASS_NONE = -1, + SNDRV_TIMER_CLASS_SLAVE = 0, + SNDRV_TIMER_CLASS_GLOBAL, + SNDRV_TIMER_CLASS_CARD, + SNDRV_TIMER_CLASS_PCM, + SNDRV_TIMER_CLASS_LAST = SNDRV_TIMER_CLASS_PCM, +}; + +/* slave timer classes */ +enum { + SNDRV_TIMER_SCLASS_NONE = 0, + SNDRV_TIMER_SCLASS_APPLICATION, + SNDRV_TIMER_SCLASS_SEQUENCER, /* alias */ + SNDRV_TIMER_SCLASS_OSS_SEQUENCER, /* alias */ + SNDRV_TIMER_SCLASS_LAST = SNDRV_TIMER_SCLASS_OSS_SEQUENCER, +}; + +/* global timers (device member) */ +#define SNDRV_TIMER_GLOBAL_SYSTEM 0 +#define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */ +#define SNDRV_TIMER_GLOBAL_HPET 2 +#define SNDRV_TIMER_GLOBAL_HRTIMER 3 + +/* info flags */ +#define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ + +struct snd_timer_id { + int dev_class; + int dev_sclass; + int card; + int device; + int subdevice; +}; + +struct snd_timer_ginfo { + struct snd_timer_id tid; /* requested timer ID */ + unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */ + int card; /* card number */ + unsigned char id[64]; /* timer identification */ + unsigned char name[80]; /* timer name */ + unsigned long reserved0; /* reserved for future use */ + unsigned long resolution; /* average period resolution in ns */ + unsigned long resolution_min; /* minimal period resolution in ns */ + unsigned long resolution_max; /* maximal period resolution in ns */ + unsigned int clients; /* active timer clients */ + unsigned char reserved[32]; +}; + +struct snd_timer_gparams { + struct snd_timer_id tid; /* requested timer ID */ + unsigned long period_num; /* requested precise period duration (in seconds) - numerator */ + unsigned long period_den; /* requested precise period duration (in seconds) - denominator */ + unsigned char reserved[32]; +}; + +struct snd_timer_gstatus { + struct snd_timer_id tid; /* requested timer ID */ + unsigned long resolution; /* current period resolution in ns */ + unsigned long resolution_num; /* precise current period resolution (in seconds) - numerator */ + unsigned long resolution_den; /* precise current period resolution (in seconds) - denominator */ + unsigned char reserved[32]; +}; + +struct snd_timer_select { + struct snd_timer_id id; /* bind to timer ID */ + unsigned char reserved[32]; /* reserved */ +}; + +struct snd_timer_info { + unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */ + int card; /* card number */ + unsigned char id[64]; /* timer identificator */ + unsigned char name[80]; /* timer name */ + unsigned long reserved0; /* reserved for future use */ + unsigned long resolution; /* average period resolution in ns */ + unsigned char reserved[64]; /* reserved */ +}; + +#define SNDRV_TIMER_PSFLG_AUTO (1<<0) /* auto start, otherwise one-shot */ +#define SNDRV_TIMER_PSFLG_EXCLUSIVE (1<<1) /* exclusive use, precise start/stop/pause/continue */ +#define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */ + +struct snd_timer_params { + unsigned int flags; /* flags - SNDRV_MIXER_PSFLG_* */ + unsigned int ticks; /* requested resolution in ticks */ + unsigned int queue_size; /* total size of queue (32-1024) */ + unsigned int reserved0; /* reserved, was: failure locations */ + unsigned int filter; /* event filter (bitmask of SNDRV_TIMER_EVENT_*) */ + unsigned char reserved[60]; /* reserved */ +}; + +struct snd_timer_status { + struct timespec tstamp; /* Timestamp - last update */ + unsigned int resolution; /* current period resolution in ns */ + unsigned int lost; /* counter of master tick lost */ + unsigned int overrun; /* count of read queue overruns */ + unsigned int queue; /* used queue size */ + unsigned char reserved[64]; /* reserved */ +}; + +#define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) +#define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) +#define SNDRV_TIMER_IOCTL_TREAD _IOW('T', 0x02, int) +#define SNDRV_TIMER_IOCTL_GINFO _IOWR('T', 0x03, struct snd_timer_ginfo) +#define SNDRV_TIMER_IOCTL_GPARAMS _IOW('T', 0x04, struct snd_timer_gparams) +#define SNDRV_TIMER_IOCTL_GSTATUS _IOWR('T', 0x05, struct snd_timer_gstatus) +#define SNDRV_TIMER_IOCTL_SELECT _IOW('T', 0x10, struct snd_timer_select) +#define SNDRV_TIMER_IOCTL_INFO _IOR('T', 0x11, struct snd_timer_info) +#define SNDRV_TIMER_IOCTL_PARAMS _IOW('T', 0x12, struct snd_timer_params) +#define SNDRV_TIMER_IOCTL_STATUS _IOR('T', 0x14, struct snd_timer_status) +/* The following four ioctls are changed since 1.0.9 due to confliction */ +#define SNDRV_TIMER_IOCTL_START _IO('T', 0xa0) +#define SNDRV_TIMER_IOCTL_STOP _IO('T', 0xa1) +#define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) +#define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) + +struct snd_timer_read { + unsigned int resolution; + unsigned int ticks; +}; + +enum { + SNDRV_TIMER_EVENT_RESOLUTION = 0, /* val = resolution in ns */ + SNDRV_TIMER_EVENT_TICK, /* val = ticks */ + SNDRV_TIMER_EVENT_START, /* val = resolution in ns */ + SNDRV_TIMER_EVENT_STOP, /* val = 0 */ + SNDRV_TIMER_EVENT_CONTINUE, /* val = resolution in ns */ + SNDRV_TIMER_EVENT_PAUSE, /* val = 0 */ + SNDRV_TIMER_EVENT_EARLY, /* val = 0, early event */ + SNDRV_TIMER_EVENT_SUSPEND, /* val = 0 */ + SNDRV_TIMER_EVENT_RESUME, /* val = resolution in ns */ + /* master timer events for slave timer instances */ + SNDRV_TIMER_EVENT_MSTART = SNDRV_TIMER_EVENT_START + 10, + SNDRV_TIMER_EVENT_MSTOP = SNDRV_TIMER_EVENT_STOP + 10, + SNDRV_TIMER_EVENT_MCONTINUE = SNDRV_TIMER_EVENT_CONTINUE + 10, + SNDRV_TIMER_EVENT_MPAUSE = SNDRV_TIMER_EVENT_PAUSE + 10, + SNDRV_TIMER_EVENT_MSUSPEND = SNDRV_TIMER_EVENT_SUSPEND + 10, + SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10, +}; + +struct snd_timer_tread { + int event; + struct timespec tstamp; + unsigned int val; +}; + +/**************************************************************************** + * * + * Section for driver control interface - /dev/snd/control? * + * * + ****************************************************************************/ + +#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) + +struct snd_ctl_card_info { + int card; /* card number */ + int pad; /* reserved for future (was type) */ + unsigned char id[16]; /* ID of card (user selectable) */ + unsigned char driver[16]; /* Driver name */ + unsigned char name[32]; /* Short name of soundcard */ + unsigned char longname[80]; /* name + info text about soundcard */ + unsigned char reserved_[16]; /* reserved for future (was ID of mixer) */ + unsigned char mixername[80]; /* visual mixer identification */ + unsigned char components[128]; /* card components / fine identification, delimited with one space (AC97 etc..) */ +}; + +typedef int __bitwise snd_ctl_elem_type_t; +#define SNDRV_CTL_ELEM_TYPE_NONE ((__force snd_ctl_elem_type_t) 0) /* invalid */ +#define SNDRV_CTL_ELEM_TYPE_BOOLEAN ((__force snd_ctl_elem_type_t) 1) /* boolean type */ +#define SNDRV_CTL_ELEM_TYPE_INTEGER ((__force snd_ctl_elem_type_t) 2) /* integer type */ +#define SNDRV_CTL_ELEM_TYPE_ENUMERATED ((__force snd_ctl_elem_type_t) 3) /* enumerated type */ +#define SNDRV_CTL_ELEM_TYPE_BYTES ((__force snd_ctl_elem_type_t) 4) /* byte array */ +#define SNDRV_CTL_ELEM_TYPE_IEC958 ((__force snd_ctl_elem_type_t) 5) /* IEC958 (S/PDIF) setup */ +#define SNDRV_CTL_ELEM_TYPE_INTEGER64 ((__force snd_ctl_elem_type_t) 6) /* 64-bit integer type */ +#define SNDRV_CTL_ELEM_TYPE_LAST SNDRV_CTL_ELEM_TYPE_INTEGER64 + +typedef int __bitwise snd_ctl_elem_iface_t; +#define SNDRV_CTL_ELEM_IFACE_CARD ((__force snd_ctl_elem_iface_t) 0) /* global control */ +#define SNDRV_CTL_ELEM_IFACE_HWDEP ((__force snd_ctl_elem_iface_t) 1) /* hardware dependent device */ +#define SNDRV_CTL_ELEM_IFACE_MIXER ((__force snd_ctl_elem_iface_t) 2) /* virtual mixer device */ +#define SNDRV_CTL_ELEM_IFACE_PCM ((__force snd_ctl_elem_iface_t) 3) /* PCM device */ +#define SNDRV_CTL_ELEM_IFACE_RAWMIDI ((__force snd_ctl_elem_iface_t) 4) /* RawMidi device */ +#define SNDRV_CTL_ELEM_IFACE_TIMER ((__force snd_ctl_elem_iface_t) 5) /* timer device */ +#define SNDRV_CTL_ELEM_IFACE_SEQUENCER ((__force snd_ctl_elem_iface_t) 6) /* sequencer client */ +#define SNDRV_CTL_ELEM_IFACE_LAST SNDRV_CTL_ELEM_IFACE_SEQUENCER + +#define SNDRV_CTL_ELEM_ACCESS_READ (1<<0) +#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) +#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) +#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ +#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<3) /* when was control changed */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) +#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND (1<<6) /* TLV command is possible */ +#define SNDRV_CTL_ELEM_ACCESS_INACTIVE (1<<8) /* control does actually nothing, but may be updated */ +#define SNDRV_CTL_ELEM_ACCESS_LOCK (1<<9) /* write lock */ +#define SNDRV_CTL_ELEM_ACCESS_OWNER (1<<10) /* write lock owner */ +#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */ +#define SNDRV_CTL_ELEM_ACCESS_USER (1<<29) /* user space element */ +/* bits 30 and 31 are obsoleted (for indirect access) */ + +/* for further details see the ACPI and PCI power management specification */ +#define SNDRV_CTL_POWER_D0 0x0000 /* full On */ +#define SNDRV_CTL_POWER_D1 0x0100 /* partial On */ +#define SNDRV_CTL_POWER_D2 0x0200 /* partial On */ +#define SNDRV_CTL_POWER_D3 0x0300 /* Off */ +#define SNDRV_CTL_POWER_D3hot (SNDRV_CTL_POWER_D3|0x0000) /* Off, with power */ +#define SNDRV_CTL_POWER_D3cold (SNDRV_CTL_POWER_D3|0x0001) /* Off, without power */ + +#define SNDRV_CTL_ELEM_ID_NAME_MAXLEN 44 + +struct snd_ctl_elem_id { + unsigned int numid; /* numeric identifier, zero = invalid */ + snd_ctl_elem_iface_t iface; /* interface identifier */ + unsigned int device; /* device/client number */ + unsigned int subdevice; /* subdevice (substream) number */ + unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; /* ASCII name of item */ + unsigned int index; /* index of item */ +}; + +struct snd_ctl_elem_list { + unsigned int offset; /* W: first element ID to get */ + unsigned int space; /* W: count of element IDs to get */ + unsigned int used; /* R: count of element IDs set */ + unsigned int count; /* R: count of all elements */ + struct snd_ctl_elem_id __user *pids; /* R: IDs */ + unsigned char reserved[50]; +}; + +struct snd_ctl_elem_info { + struct snd_ctl_elem_id id; /* W: element ID */ + snd_ctl_elem_type_t type; /* R: value type - SNDRV_CTL_ELEM_TYPE_* */ + unsigned int access; /* R: value access (bitmask) - SNDRV_CTL_ELEM_ACCESS_* */ + unsigned int count; /* count of values */ + __kernel_pid_t owner; /* owner's PID of this control */ + union { + struct { + long min; /* R: minimum value */ + long max; /* R: maximum value */ + long step; /* R: step (0 variable) */ + } integer; + struct { + long long min; /* R: minimum value */ + long long max; /* R: maximum value */ + long long step; /* R: step (0 variable) */ + } integer64; + struct { + unsigned int items; /* R: number of items */ + unsigned int item; /* W: item number */ + char name[64]; /* R: value name */ + __u64 names_ptr; /* W: names list (ELEM_ADD only) */ + unsigned int names_length; + } enumerated; + unsigned char reserved[128]; + } value; + union { + unsigned short d[4]; /* dimensions */ + unsigned short *d_ptr; /* indirect - obsoleted */ + } dimen; + unsigned char reserved[64-4*sizeof(unsigned short)]; +}; + +struct snd_ctl_elem_value { + struct snd_ctl_elem_id id; /* W: element ID */ + unsigned int indirect: 1; /* W: indirect access - obsoleted */ + union { + union { + long value[128]; + long *value_ptr; /* obsoleted */ + } integer; + union { + long long value[64]; + long long *value_ptr; /* obsoleted */ + } integer64; + union { + unsigned int item[128]; + unsigned int *item_ptr; /* obsoleted */ + } enumerated; + union { + unsigned char data[512]; + unsigned char *data_ptr; /* obsoleted */ + } bytes; + struct snd_aes_iec958 iec958; + } value; /* RO */ + struct timespec tstamp; + unsigned char reserved[128-sizeof(struct timespec)]; +}; + +struct snd_ctl_tlv { + unsigned int numid; /* control element numeric identification */ + unsigned int length; /* in bytes aligned to 4 */ + unsigned int tlv[0]; /* first TLV */ +}; + +#define SNDRV_CTL_IOCTL_PVERSION _IOR('U', 0x00, int) +#define SNDRV_CTL_IOCTL_CARD_INFO _IOR('U', 0x01, struct snd_ctl_card_info) +#define SNDRV_CTL_IOCTL_ELEM_LIST _IOWR('U', 0x10, struct snd_ctl_elem_list) +#define SNDRV_CTL_IOCTL_ELEM_INFO _IOWR('U', 0x11, struct snd_ctl_elem_info) +#define SNDRV_CTL_IOCTL_ELEM_READ _IOWR('U', 0x12, struct snd_ctl_elem_value) +#define SNDRV_CTL_IOCTL_ELEM_WRITE _IOWR('U', 0x13, struct snd_ctl_elem_value) +#define SNDRV_CTL_IOCTL_ELEM_LOCK _IOW('U', 0x14, struct snd_ctl_elem_id) +#define SNDRV_CTL_IOCTL_ELEM_UNLOCK _IOW('U', 0x15, struct snd_ctl_elem_id) +#define SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS _IOWR('U', 0x16, int) +#define SNDRV_CTL_IOCTL_ELEM_ADD _IOWR('U', 0x17, struct snd_ctl_elem_info) +#define SNDRV_CTL_IOCTL_ELEM_REPLACE _IOWR('U', 0x18, struct snd_ctl_elem_info) +#define SNDRV_CTL_IOCTL_ELEM_REMOVE _IOWR('U', 0x19, struct snd_ctl_elem_id) +#define SNDRV_CTL_IOCTL_TLV_READ _IOWR('U', 0x1a, struct snd_ctl_tlv) +#define SNDRV_CTL_IOCTL_TLV_WRITE _IOWR('U', 0x1b, struct snd_ctl_tlv) +#define SNDRV_CTL_IOCTL_TLV_COMMAND _IOWR('U', 0x1c, struct snd_ctl_tlv) +#define SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE _IOWR('U', 0x20, int) +#define SNDRV_CTL_IOCTL_HWDEP_INFO _IOR('U', 0x21, struct snd_hwdep_info) +#define SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE _IOR('U', 0x30, int) +#define SNDRV_CTL_IOCTL_PCM_INFO _IOWR('U', 0x31, struct snd_pcm_info) +#define SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE _IOW('U', 0x32, int) +#define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int) +#define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info) +#define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int) +#define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int) +#define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int) + +/* + * Read interface. + */ + +enum sndrv_ctl_event_type { + SNDRV_CTL_EVENT_ELEM = 0, + SNDRV_CTL_EVENT_LAST = SNDRV_CTL_EVENT_ELEM, +}; + +#define SNDRV_CTL_EVENT_MASK_VALUE (1<<0) /* element value was changed */ +#define SNDRV_CTL_EVENT_MASK_INFO (1<<1) /* element info was changed */ +#define SNDRV_CTL_EVENT_MASK_ADD (1<<2) /* element was added */ +#define SNDRV_CTL_EVENT_MASK_TLV (1<<3) /* element TLV tree was changed */ +#define SNDRV_CTL_EVENT_MASK_REMOVE (~0U) /* element was removed */ + +struct snd_ctl_event { + int type; /* event type - SNDRV_CTL_EVENT_* */ + union { + struct { + unsigned int mask; + struct snd_ctl_elem_id id; + } elem; + unsigned char data8[60]; + } data; +}; + +/* + * Control names + */ + +#define SNDRV_CTL_NAME_NONE "" +#define SNDRV_CTL_NAME_PLAYBACK "Playback " +#define SNDRV_CTL_NAME_CAPTURE "Capture " + +#define SNDRV_CTL_NAME_IEC958_NONE "" +#define SNDRV_CTL_NAME_IEC958_SWITCH "Switch" +#define SNDRV_CTL_NAME_IEC958_VOLUME "Volume" +#define SNDRV_CTL_NAME_IEC958_DEFAULT "Default" +#define SNDRV_CTL_NAME_IEC958_MASK "Mask" +#define SNDRV_CTL_NAME_IEC958_CON_MASK "Con Mask" +#define SNDRV_CTL_NAME_IEC958_PRO_MASK "Pro Mask" +#define SNDRV_CTL_NAME_IEC958_PCM_STREAM "PCM Stream" +#define SNDRV_CTL_NAME_IEC958(expl,direction,what) "IEC958 " expl SNDRV_CTL_NAME_##direction SNDRV_CTL_NAME_IEC958_##what + +#endif /* _UAPI__SOUND_ASOUND_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index eb4ccd73e826..35a33adefed2 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -84,6 +84,7 @@ tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h tools/include/uapi/linux/sched.h tools/include/uapi/linux/stat.h +tools/include/uapi/sound/asound.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 0024525376bf..1e07c9b062de 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -7,6 +7,7 @@ include/uapi/linux/fcntl.h include/uapi/linux/perf_event.h include/uapi/linux/sched.h include/uapi/linux/stat.h +include/uapi/sound/asound.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h From 2c3e96291164d46e58ea064b778d5f8f747d6c6a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 17:34:47 -0300 Subject: [PATCH 153/253] perf trace beautify ioctl: Beautify sound ioctl's 'cmd' arg This time we try a new approach, using a copy of uapi/sound/asound.h we auto generate the string tables, then include it in the ioctl cmd beautifier. This way either the sound developers will add the new commands to the tools/ copy, like is happening with other areas of tools/include/ (bpf.h comes to mind), or we'll be notified when building perf that our copy drifted. E.g.: # perf trace -p 22084 -e ioctl 2>&1 | head -5 0.000 ( 0.068 ms): alsa-sink-ALC3/22084 ioctl(fd: 49, cmd: SNDRV_PCM_HWSYNC, arg: 0x557f8d7fa0f0) = 0 0.344 ( 0.041 ms): alsa-sink-ALC3/22084 ioctl(fd: 46, cmd: SNDRV_CTL_ELEM_READ, arg: 0x7fe764018ee0) = 0 0.403 ( 0.011 ms): alsa-sink-ALC3/22084 ioctl(fd: 49, cmd: SNDRV_PCM_HWSYNC, arg: 0x557f8d7fa0f0) = 0 0.427 ( 0.009 ms): alsa-sink-ALC3/22084 ioctl(fd: 49, cmd: SNDRV_PCM_STATUS_EXT, arg: 0x7fe76c2e0b30) = 0 2.461 ( 0.042 ms): alsa-sink-ALC3/22084 ioctl(fd: 49, cmd: SNDRV_PCM_HWSYNC, arg: 0x557f8d7fa0f0) = 0 # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-8zuyf3e3u6jjcb2xzerw0kdi@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 26 ++++++++++++++++---- tools/perf/trace/beauty/ioctl.c | 28 ++++++++++++++++++++-- tools/perf/trace/beauty/sndrv_ctl_ioctl.sh | 8 +++++++ tools/perf/trace/beauty/sndrv_pcm_ioctl.sh | 8 +++++++ 4 files changed, 63 insertions(+), 7 deletions(-) create mode 100755 tools/perf/trace/beauty/sndrv_ctl_ioctl.sh create mode 100755 tools/perf/trace/beauty/sndrv_pcm_ioctl.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 78654995b99f..d49354d340fd 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -381,17 +381,31 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) -drm_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl -drm_ioctl_array := $(drm_ioctl_outdir)/drm_ioctl_array.c +beauty_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl +drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c drm_hdr_dir := $(srctree)/tools/include/uapi/drm drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh # Create output directory if not already present -_dummy := $(shell [ -d '$(drm_ioctl_outdir)' ] || mkdir -p '$(drm_ioctl_outdir)') +_dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)') $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ +sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c +sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound +sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh + +$(sndrv_ctl_ioctl_array): $(sndrv_ctl_hdr_dir)/asound.h $(sndrv_ctl_ioctl_tbl) + $(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(sndrv_ctl_hdr_dir) > $@ + +sndrv_pcm_ioctl_array := $(beauty_ioctl_outdir)/sndrv_pcm_ioctl_array.c +sndrv_pcm_hdr_dir := $(srctree)/tools/include/uapi/sound +sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh + +$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl) + $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -486,7 +500,7 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) $(sndrv_pcm_ioctl_array) $(sndrv_ctl_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -752,7 +766,9 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ - $(OUTPUT)$(drm_ioctl_array) + $(OUTPUT)$(drm_ioctl_array) \ + $(OUTPUT)$(sndrv_ctl_ioctl_array) \ + $(OUTPUT)$(sndrv_pcm_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 732489d141c4..ec5a5a499667 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -55,6 +55,28 @@ static size_t ioctl__scnprintf_drm_cmd(int nr, char *bf, size_t size) return scnprintf(bf, size, "(%#x, %#x)", 'd', nr); } +static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/sndrv_pcm_ioctl_array.c" + static DEFINE_STRARRAY(sndrv_pcm_ioctl_cmds); + + if (nr < strarray__sndrv_pcm_ioctl_cmds.nr_entries && strarray__sndrv_pcm_ioctl_cmds.entries[nr] != NULL) + return scnprintf(bf, size, "SNDRV_PCM_%s", strarray__sndrv_pcm_ioctl_cmds.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x)", 'A', nr); +} + +static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/sndrv_ctl_ioctl_array.c" + static DEFINE_STRARRAY(sndrv_ctl_ioctl_cmds); + + if (nr < strarray__sndrv_ctl_ioctl_cmds.nr_entries && strarray__sndrv_ctl_ioctl_cmds.entries[nr] != NULL) + return scnprintf(bf, size, "SNDRV_CTL_%s", strarray__sndrv_ctl_ioctl_cmds.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x)", 'U', nr); +} + static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { int dir = _IOC_DIR(cmd), @@ -66,8 +88,10 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) int type; size_t (*scnprintf)(int nr, char *bf, size_t size); } ioctl_types[] = { /* Must be ordered by type */ - { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, - ['d' - 'T'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, } + { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, }, + ['T' - 'A']= { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, + ['U' - 'A']= { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, }, + ['d' - 'A'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, } }; const int nr_types = ARRAY_SIZE(ioctl_types); diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh new file mode 100755 index 000000000000..aad5ab130539 --- /dev/null +++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +sound_header_dir=$1 + +printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n" +grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $sound_header_dir/asound.h | \ + sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g' +printf "};\n" diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh new file mode 100755 index 000000000000..b7e9ef6b2f55 --- /dev/null +++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +sound_header_dir=$1 + +printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n" +grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $sound_header_dir/asound.h | \ + sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g' +printf "};\n" From 3ce97513f960b62fc9b2eee892162e225cd0f3a8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 16:45:32 -0300 Subject: [PATCH 154/253] tools include uapi: Grab a copy of linux/kvm.h We will use it to generate tables for beautifying ioctl's 'cmd' arg. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nxwpq34hu6te1m2ra5m7o8n9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 1419 ++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 1 + 3 files changed, 1421 insertions(+) create mode 100644 tools/include/uapi/linux/kvm.h diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h new file mode 100644 index 000000000000..6cd63c18708a --- /dev/null +++ b/tools/include/uapi/linux/kvm.h @@ -0,0 +1,1419 @@ +#ifndef __LINUX_KVM_H +#define __LINUX_KVM_H + +/* + * Userspace interface for /dev/kvm - kernel based virtual machine + * + * Note: you must update KVM_API_VERSION if you change this interface. + */ + +#include +#include +#include +#include + +#define KVM_API_VERSION 12 + +/* *** Deprecated interfaces *** */ + +#define KVM_TRC_SHIFT 16 + +#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) +#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) + +#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) +#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) +#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) + +#define KVM_TRC_HEAD_SIZE 12 +#define KVM_TRC_CYCLE_SIZE 8 +#define KVM_TRC_EXTRA_MAX 7 + +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) + +struct kvm_user_trace_setup { + __u32 buf_size; + __u32 buf_nr; +}; + +#define __KVM_DEPRECATED_MAIN_W_0x06 \ + _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) +#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) +#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) + +#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) + +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; +}; + +#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) + +/* *** End of deprecated interfaces *** */ + + +/* for KVM_CREATE_MEMORY_REGION */ +struct kvm_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ +}; + +/* for KVM_SET_USER_MEMORY_REGION */ +struct kvm_userspace_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ +}; + +/* + * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, + * other bits are reserved for kvm internal use which are defined in + * include/linux/kvm_host.h. + */ +#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define KVM_MEM_READONLY (1UL << 1) + +/* for KVM_IRQ_LINE */ +struct kvm_irq_level { + /* + * ACPI gsi notion of irq. + * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. + * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * For ARM: See Documentation/virtual/kvm/api.txt + */ + union { + __u32 irq; + __s32 status; + }; + __u32 level; +}; + + +struct kvm_irqchip { + __u32 chip_id; + __u32 pad; + union { + char dummy[512]; /* reserving space */ +#ifdef __KVM_HAVE_PIT + struct kvm_pic_state pic; +#endif +#ifdef __KVM_HAVE_IOAPIC + struct kvm_ioapic_state ioapic; +#endif + } chip; +}; + +/* for KVM_CREATE_PIT2 */ +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +#define KVM_PIT_SPEAKER_DUMMY 1 + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 +#define KVM_EXIT_HYPERV_HCALL 2 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + struct { + __u64 input; + __u64 result; + __u64 params[2]; + } hcall; + } u; +}; + +#define KVM_S390_GET_SKEYS_NONE 1 +#define KVM_S390_SKEYS_MAX 1048576 + +#define KVM_EXIT_UNKNOWN 0 +#define KVM_EXIT_EXCEPTION 1 +#define KVM_EXIT_IO 2 +#define KVM_EXIT_HYPERCALL 3 +#define KVM_EXIT_DEBUG 4 +#define KVM_EXIT_HLT 5 +#define KVM_EXIT_MMIO 6 +#define KVM_EXIT_IRQ_WINDOW_OPEN 7 +#define KVM_EXIT_SHUTDOWN 8 +#define KVM_EXIT_FAIL_ENTRY 9 +#define KVM_EXIT_INTR 10 +#define KVM_EXIT_SET_TPR 11 +#define KVM_EXIT_TPR_ACCESS 12 +#define KVM_EXIT_S390_SIEIC 13 +#define KVM_EXIT_S390_RESET 14 +#define KVM_EXIT_DCR 15 /* deprecated */ +#define KVM_EXIT_NMI 16 +#define KVM_EXIT_INTERNAL_ERROR 17 +#define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19 +#define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 +#define KVM_EXIT_S390_TSCH 22 +#define KVM_EXIT_EPR 23 +#define KVM_EXIT_SYSTEM_EVENT 24 +#define KVM_EXIT_S390_STSI 25 +#define KVM_EXIT_IOAPIC_EOI 26 +#define KVM_EXIT_HYPERV 27 + +/* For KVM_EXIT_INTERNAL_ERROR */ +/* Emulate instruction failed. */ +#define KVM_INTERNAL_ERROR_EMULATION 1 +/* Encounter unexpected simultaneous exceptions. */ +#define KVM_INTERNAL_ERROR_SIMUL_EX 2 +/* Encounter unexpected vm-exit due to delivery event. */ +#define KVM_INTERNAL_ERROR_DELIVERY_EV 3 + +/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ +struct kvm_run { + /* in */ + __u8 request_interrupt_window; + __u8 immediate_exit; + __u8 padding1[6]; + + /* out */ + __u32 exit_reason; + __u8 ready_for_interrupt_injection; + __u8 if_flag; + __u16 flags; + + /* in (pre_kvm_run), out (post_kvm_run) */ + __u64 cr8; + __u64 apic_base; + +#ifdef __KVM_S390 + /* the processor status word for s390 */ + __u64 psw_mask; /* psw upper half */ + __u64 psw_addr; /* psw lower half */ +#endif + union { + /* KVM_EXIT_UNKNOWN */ + struct { + __u64 hardware_exit_reason; + } hw; + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + } fail_entry; + /* KVM_EXIT_EXCEPTION */ + struct { + __u32 exception; + __u32 error_code; + } ex; + /* KVM_EXIT_IO */ + struct { +#define KVM_EXIT_IO_IN 0 +#define KVM_EXIT_IO_OUT 1 + __u8 direction; + __u8 size; /* bytes */ + __u16 port; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ + } io; + /* KVM_EXIT_DEBUG */ + struct { + struct kvm_debug_exit_arch arch; + } debug; + /* KVM_EXIT_MMIO */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } mmio; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; + __u64 args[6]; + __u64 ret; + __u32 longmode; + __u32 pad; + } hypercall; + /* KVM_EXIT_TPR_ACCESS */ + struct { + __u64 rip; + __u32 is_write; + __u32 pad; + } tpr_access; + /* KVM_EXIT_S390_SIEIC */ + struct { + __u8 icptcode; + __u16 ipa; + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { + __u64 trans_exc_code; + __u32 pgm_code; + } s390_ucontrol; + /* KVM_EXIT_DCR (deprecated) */ + struct { + __u32 dcrn; + __u32 data; + __u8 is_write; + } dcr; + /* KVM_EXIT_INTERNAL_ERROR */ + struct { + __u32 suberror; + /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ + __u32 ndata; + __u64 data[16]; + } internal; + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; + /* KVM_EXIT_PAPR_HCALL */ + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; + __u64 flags; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; + } s390_stsi; + /* KVM_EXIT_IOAPIC_EOI */ + struct { + __u8 vector; + } eoi; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; + /* Fix the size of the union. */ + char padding[256]; + }; + + /* + * shared registers between kvm and userspace. + * kvm_valid_regs specifies the register classes set by the host + * kvm_dirty_regs specified the register classes dirtied by userspace + * struct kvm_sync_regs is architecture specific, as well as the + * bits for kvm_valid_regs and kvm_dirty_regs + */ + __u64 kvm_valid_regs; + __u64 kvm_dirty_regs; + union { + struct kvm_sync_regs regs; + char padding[2048]; + } s; +}; + +/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */ + +struct kvm_coalesced_mmio_zone { + __u64 addr; + __u32 size; + __u32 pad; +}; + +struct kvm_coalesced_mmio { + __u64 phys_addr; + __u32 len; + __u32 pad; + __u8 data[8]; +}; + +struct kvm_coalesced_mmio_ring { + __u32 first, last; + struct kvm_coalesced_mmio coalesced_mmio[0]; +}; + +#define KVM_COALESCED_MMIO_MAX \ + ((PAGE_SIZE - sizeof(struct kvm_coalesced_mmio_ring)) / \ + sizeof(struct kvm_coalesced_mmio)) + +/* for KVM_TRANSLATE */ +struct kvm_translation { + /* in */ + __u64 linear_address; + + /* out */ + __u64 physical_address; + __u8 valid; + __u8 writeable; + __u8 usermode; + __u8 pad[5]; +}; + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar; /* the access register number */ + __u8 reserved[31]; /* should be set to 0 */ +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) + +/* for KVM_INTERRUPT */ +struct kvm_interrupt { + /* in */ + __u32 irq; +}; + +/* for KVM_GET_DIRTY_LOG */ +struct kvm_dirty_log { + __u32 slot; + __u32 padding1; + union { + void __user *dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + +/* for KVM_SET_SIGNAL_MASK */ +struct kvm_signal_mask { + __u32 len; + __u8 sigset[0]; +}; + +/* for KVM_TPR_ACCESS_REPORTING */ +struct kvm_tpr_access_ctl { + __u32 enabled; + __u32 flags; + __u32 reserved[8]; +}; + +/* for KVM_SET_VAPIC_ADDR */ +struct kvm_vapic_addr { + __u64 vapic_addr; +}; + +/* for KVM_SET_MP_STATE */ + +/* not all states are valid on all architectures */ +#define KVM_MP_STATE_RUNNABLE 0 +#define KVM_MP_STATE_UNINITIALIZED 1 +#define KVM_MP_STATE_INIT_RECEIVED 2 +#define KVM_MP_STATE_HALTED 3 +#define KVM_MP_STATE_SIPI_RECEIVED 4 +#define KVM_MP_STATE_STOPPED 5 +#define KVM_MP_STATE_CHECK_STOP 6 +#define KVM_MP_STATE_OPERATING 7 +#define KVM_MP_STATE_LOAD 8 + +struct kvm_mp_state { + __u32 mp_state; +}; + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + +/* for KVM_SET_GUEST_DEBUG */ + +#define KVM_GUESTDBG_ENABLE 0x00000001 +#define KVM_GUESTDBG_SINGLESTEP 0x00000002 + +struct kvm_guest_debug { + __u32 control; + __u32 pad; + struct kvm_guest_debug_arch arch; +}; + +enum { + kvm_ioeventfd_flag_nr_datamatch, + kvm_ioeventfd_flag_nr_pio, + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, + kvm_ioeventfd_flag_nr_fast_mmio, + kvm_ioeventfd_flag_nr_max, +}; + +#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) +#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) +#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) + +#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) + +struct kvm_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes; or 0 to ignore length */ + __s32 fd; + __u32 flags; + __u8 pad[36]; +}; + +/* for KVM_ENABLE_CAP */ +struct kvm_enable_cap { + /* in */ + __u32 cap; + __u32 flags; + __u64 args[4]; + __u8 pad[64]; +}; + +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + +#define KVMIO 0xAE + +/* machine type bits, to be used as argument to KVM_CREATE_VM */ +#define KVM_VM_S390_UCONTROL 1 + +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + +/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ +#define KVM_VM_MIPS_TE 0 +#define KVM_VM_MIPS_VZ 1 + +#define KVM_S390_SIE_PAGE_OFFSET 1 + +/* + * ioctls for /dev/kvm fds: + */ +#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) +#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ +#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) + +#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) +/* + * Check if a kvm extension is available. Argument is extension number, + * return is 1 (yes) or 0 (no, sorry). + */ +#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) +/* + * Get size for mmap(vcpu_fd) + */ +#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) +#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 +#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 +#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) + +/* + * Extension capability list. + */ +#define KVM_CAP_IRQCHIP 0 +#define KVM_CAP_HLT 1 +#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 +#define KVM_CAP_USER_MEMORY 3 +#define KVM_CAP_SET_TSS_ADDR 4 +#define KVM_CAP_VAPIC 6 +#define KVM_CAP_EXT_CPUID 7 +#define KVM_CAP_CLOCKSOURCE 8 +#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ +#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ +#define KVM_CAP_PIT 11 +#define KVM_CAP_NOP_IO_DELAY 12 +#define KVM_CAP_PV_MMU 13 +#define KVM_CAP_MP_STATE 14 +#define KVM_CAP_COALESCED_MMIO 15 +#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#define KVM_CAP_IOMMU 18 +/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 +#define KVM_CAP_USER_NMI 22 +#ifdef __KVM_HAVE_GUEST_DEBUG +#define KVM_CAP_SET_GUEST_DEBUG 23 +#endif +#ifdef __KVM_HAVE_PIT +#define KVM_CAP_REINJECT_CONTROL 24 +#endif +#define KVM_CAP_IRQ_ROUTING 25 +#define KVM_CAP_IRQ_INJECT_STATUS 26 +#define KVM_CAP_ASSIGN_DEV_IRQ 29 +/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#ifdef __KVM_HAVE_MCE +#define KVM_CAP_MCE 31 +#endif +#define KVM_CAP_IRQFD 32 +#ifdef __KVM_HAVE_PIT +#define KVM_CAP_PIT2 33 +#endif +#define KVM_CAP_SET_BOOT_CPU_ID 34 +#ifdef __KVM_HAVE_PIT_STATE2 +#define KVM_CAP_PIT_STATE2 35 +#endif +#define KVM_CAP_IOEVENTFD 36 +#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif +#define KVM_CAP_ADJUST_CLOCK 39 +#define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif +#define KVM_CAP_S390_PSW 42 +#define KVM_CAP_PPC_SEGSTATE 43 +#define KVM_CAP_HYPERV 44 +#define KVM_CAP_HYPERV_VAPIC 45 +#define KVM_CAP_HYPERV_SPIN 46 +#define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_PPC_PAIRED_SINGLES 48 +#define KVM_CAP_INTR_SHADOW 49 +#ifdef __KVM_HAVE_DEBUGREGS +#define KVM_CAP_DEBUGREGS 50 +#endif +#define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_PPC_OSI 52 +#define KVM_CAP_PPC_UNSET_IRQ 53 +#define KVM_CAP_ENABLE_CAP 54 +#ifdef __KVM_HAVE_XSAVE +#define KVM_CAP_XSAVE 55 +#endif +#ifdef __KVM_HAVE_XCRS +#define KVM_CAP_XCRS 56 +#endif +#define KVM_CAP_PPC_GET_PVINFO 57 +#define KVM_CAP_PPC_IRQ_LEVEL 58 +#define KVM_CAP_ASYNC_PF 59 +#define KVM_CAP_TSC_CONTROL 60 +#define KVM_CAP_GET_TSC_KHZ 61 +#define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ +#define KVM_CAP_PPC_HIOR 67 +#define KVM_CAP_PPC_PAPR 68 +#define KVM_CAP_SW_TLB 69 +#define KVM_CAP_ONE_REG 70 +#define KVM_CAP_S390_GMAP 71 +#define KVM_CAP_TSC_DEADLINE_TIMER 72 +#define KVM_CAP_S390_UCONTROL 73 +#define KVM_CAP_SYNC_REGS 74 +#define KVM_CAP_PCI_2_3 75 +#define KVM_CAP_KVMCLOCK_CTRL 76 +#define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 +#define KVM_CAP_S390_COW 79 +#define KVM_CAP_PPC_ALLOC_HTAB 80 +#define KVM_CAP_READONLY_MEM 81 +#define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_S390_CSS_SUPPORT 85 +#define KVM_CAP_PPC_EPR 86 +#define KVM_CAP_ARM_PSCI 87 +#define KVM_CAP_ARM_SET_DEVICE_ADDR 88 +#define KVM_CAP_DEVICE_CTRL 89 +#define KVM_CAP_IRQ_MPIC 90 +#define KVM_CAP_PPC_RTAS 91 +#define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_HYPERV_TIME 96 +#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 +#define KVM_CAP_ENABLE_CAP_VM 98 +#define KVM_CAP_S390_IRQCHIP 99 +#define KVM_CAP_IOEVENTFD_NO_LENGTH 100 +#define KVM_CAP_VM_ATTRIBUTES 101 +#define KVM_CAP_ARM_PSCI_0_2 102 +#define KVM_CAP_PPC_FIXUP_HCALL 103 +#define KVM_CAP_PPC_ENABLE_HCALL 104 +#define KVM_CAP_CHECK_EXTENSION_VM 105 +#define KVM_CAP_S390_USER_SIGP 106 +#define KVM_CAP_S390_VECTOR_REGISTERS 107 +#define KVM_CAP_S390_MEM_OP 108 +#define KVM_CAP_S390_USER_STSI 109 +#define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_MIPS_FPU 111 +#define KVM_CAP_MIPS_MSA 112 +#define KVM_CAP_S390_INJECT_IRQ 113 +#define KVM_CAP_S390_IRQ_STATE 114 +#define KVM_CAP_PPC_HWRNG 115 +#define KVM_CAP_DISABLE_QUIRKS 116 +#define KVM_CAP_X86_SMM 117 +#define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_GUEST_DEBUG_HW_BPS 119 +#define KVM_CAP_GUEST_DEBUG_HW_WPS 120 +#define KVM_CAP_SPLIT_IRQCHIP 121 +#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 +#define KVM_CAP_HYPERV_SYNIC 123 +#define KVM_CAP_S390_RI 124 +#define KVM_CAP_SPAPR_TCE_64 125 +#define KVM_CAP_ARM_PMU_V3 126 +#define KVM_CAP_VCPU_ATTRIBUTES 127 +#define KVM_CAP_MAX_VCPU_ID 128 +#define KVM_CAP_X2APIC_API 129 +#define KVM_CAP_S390_USER_INSTR0 130 +#define KVM_CAP_MSI_DEVID 131 +#define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_SPAPR_RESIZE_HPT 133 +#define KVM_CAP_PPC_MMU_RADIX 134 +#define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 +#define KVM_CAP_S390_GS 140 +#define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_GUEST_MWAIT 143 +#define KVM_CAP_ARM_USER_IRQ 144 +#define KVM_CAP_S390_CMMA_MIGRATION 145 +#define KVM_CAP_PPC_FWNMI 146 +#define KVM_CAP_PPC_SMT_POSSIBLE 147 +#define KVM_CAP_HYPERV_SYNIC2 148 +#define KVM_CAP_HYPERV_VP_INDEX 149 + +#ifdef KVM_CAP_IRQ_ROUTING + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + union { + __u32 pad; + __u32 devid; + }; +}; + +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; + __u32 pad[8]; + } u; +}; + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +#endif + +#ifdef KVM_CAP_MCE +/* x86 MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; +#endif + +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; +#endif + +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +/* + * Available with KVM_CAP_IRQFD_RESAMPLE + * + * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies + * the irqfd to operate in resampling mode for level triggered interrupt + * emulation. See Documentation/virtual/kvm/api.txt. + */ +#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) + +struct kvm_irqfd { + __u32 fd; + __u32 gsi; + __u32 flags; + __u32 resamplefd; + __u8 pad[16]; +}; + +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + +struct kvm_clock_data { + __u64 clock; + __u32 flags; + __u32 pad[9]; +}; + +/* For KVM_CAP_SW_TLB */ + +#define KVM_MMU_FSL_BOOKE_NOHV 0 +#define KVM_MMU_FSL_BOOKE_HV 1 + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + +/* Available with KVM_CAP_ONE_REG */ + +#define KVM_REG_ARCH_MASK 0xff00000000000000ULL +#define KVM_REG_GENERIC 0x0000000000000000ULL + +/* + * Architecture specific registers are to be defined in arch headers and + * ORed with the arch identifier. + */ +#define KVM_REG_PPC 0x1000000000000000ULL +#define KVM_REG_X86 0x2000000000000000ULL +#define KVM_REG_IA64 0x3000000000000000ULL +#define KVM_REG_ARM 0x4000000000000000ULL +#define KVM_REG_S390 0x5000000000000000ULL +#define KVM_REG_ARM64 0x6000000000000000ULL +#define KVM_REG_MIPS 0x7000000000000000ULL + +#define KVM_REG_SIZE_SHIFT 52 +#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +#define KVM_REG_SIZE_U8 0x0000000000000000ULL +#define KVM_REG_SIZE_U16 0x0010000000000000ULL +#define KVM_REG_SIZE_U32 0x0020000000000000ULL +#define KVM_REG_SIZE_U64 0x0030000000000000ULL +#define KVM_REG_SIZE_U128 0x0040000000000000ULL +#define KVM_REG_SIZE_U256 0x0050000000000000ULL +#define KVM_REG_SIZE_U512 0x0060000000000000ULL +#define KVM_REG_SIZE_U1024 0x0070000000000000ULL + +struct kvm_reg_list { + __u64 n; /* number of regs */ + __u64 reg[0]; +}; + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + +#define KVM_MSI_VALID_DEVID (1U << 0) +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u32 devid; + __u8 pad[12]; +}; + +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + +/* + * Device control API, available with KVM_CAP_DEVICE_CTRL + */ +#define KVM_CREATE_DEVICE_TEST 1 + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_ARM_VGIC_V3, +#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 + KVM_DEV_TYPE_ARM_VGIC_ITS, +#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_MAX, +}; + +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + +/* + * ioctls for VM fds + */ +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) +/* + * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns + * a vcpu fd. + */ +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +/* KVM_SET_MEMORY_ALIAS is obsolete: */ +#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ + struct kvm_userspace_memory_region) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) + +/* enable ucontrol for s390 */ +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; +#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) +#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) +#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) + +/* Device model IOC */ +#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) +#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) +#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_CREATE_PIT _IO(KVMIO, 0x64) +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) +#define KVM_REGISTER_COALESCED_MMIO \ + _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) +#define KVM_UNREGISTER_COALESCED_MMIO \ + _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ +#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) +#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) +#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ + struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ + struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) +#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_PIT_STATE2 */ +#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) +#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +/* Available with KVM_CAP_PPC_GET_PVINFO */ +#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) +/* Available with KVM_CAP_TSC_CONTROL */ +#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) +#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) +/* Available with KVM_CAP_PCI_2_3 */ +#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ + struct kvm_assigned_pci_dev) +/* Available with KVM_CAP_SIGNAL_MSI */ +#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) +/* Available with KVM_CAP_PPC_ALLOC_HTAB */ +#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \ + struct kvm_create_spapr_tce_64) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) +/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ +#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* Available with KVM_CAP_PPC_RTAS */ +#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) +/* Available with KVM_CAP_SPAPR_RESIZE_HPT */ +#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) +#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) +/* Available with KVM_CAP_PPC_RADIX_MMU */ +#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) + +/* ioctl for vm fd */ +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) + +/* ioctls for fds returned by KVM_CREATE_DEVICE */ +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) + +/* + * ioctls for vcpu fds + */ +#define KVM_RUN _IO(KVMIO, 0x80) +#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) +#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) +#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) +#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) +#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) +#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) +/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 +#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) +#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) +#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) +#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) +#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) +#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) +#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) +#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) +#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) +/* Available with KVM_CAP_VAPIC */ +#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) +/* Available with KVM_CAP_VAPIC */ +#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) +/* valid for virtual machine (for floating interrupt)_and_ vcpu */ +#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt) +/* store status for s390 */ +#define KVM_S390_STORE_STATUS_NOADDR (-1ul) +#define KVM_S390_STORE_STATUS_PREFIXED (-2ul) +#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long) +/* initial ipl psw for s390 */ +#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) +/* initial reset for s390 */ +#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) +#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) +#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +/* Available with KVM_CAP_USER_NMI */ +#define KVM_NMI _IO(KVMIO, 0x9a) +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) +/* MCE for x86 */ +#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) +#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) +#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) +/* Available with KVM_CAP_DEBUGREGS */ +#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) +#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +/* + * vcpu version available with KVM_ENABLE_CAP + * vm version available with KVM_CAP_ENABLE_CAP_VM + */ +#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) +/* Available with KVM_CAP_XSAVE */ +#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) +#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) +/* Available with KVM_CAP_XCRS */ +#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) +#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) +/* Available with KVM_CAP_SW_TLB */ +#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) +/* Available with KVM_CAP_ONE_REG */ +#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) +#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) +/* VM is being stopped by host */ +#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) +#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) +#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) +/* Available with KVM_CAP_S390_MEM_OP */ +#define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op) +/* Available with KVM_CAP_S390_SKEYS */ +#define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) +#define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) +/* Available with KVM_CAP_S390_INJECT_IRQ */ +#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) +/* Available with KVM_CAP_S390_IRQ_STATE */ +#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) +#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) +/* Available with KVM_CAP_X86_SMM */ +#define KVM_SMI _IO(KVMIO, 0xb7) +/* Available with KVM_CAP_S390_CMMA_MIGRATION */ +#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) +#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) + +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; /* ignored (legacy field) */ + __u32 guest_irq; + __u32 flags; + union { + __u32 reserved[12]; + }; +}; + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + +#endif /* __LINUX_KVM_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 35a33adefed2..513f52a484fd 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -80,6 +80,7 @@ tools/include/uapi/linux/bpf.h tools/include/uapi/linux/bpf_common.h tools/include/uapi/linux/fcntl.h tools/include/uapi/linux/hw_breakpoint.h +tools/include/uapi/linux/kvm.h tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h tools/include/uapi/linux/sched.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 1e07c9b062de..1f009506ff63 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -4,6 +4,7 @@ HEADERS=' include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h +include/uapi/linux/kvm.h include/uapi/linux/perf_event.h include/uapi/linux/sched.h include/uapi/linux/stat.h From 45717b7fb7e59285927170cf7fff233e0bbeeaca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 17:34:47 -0300 Subject: [PATCH 155/253] perf trace beautify ioctl: Beautify KVM ioctl's 'cmd' arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also trying a new approach, using a copy of uapi/linux/kvm.h we auto generate the string tables, then include it in the ioctl cmd beautifier. This way either the KVM developers will add the new commands to the tools/ copy, like is happening with other areas of tools/include/ (bpf.h comes to mind), or we'll be notified when building perf that our copy drifted. E.g., a tracing a process and its threads, but would work for system wide as well, just drop that '-p 21238', to see ioctls for DRM, tty, sound, etc: # perf trace -e ioctl -p 21238 2>&1 | grep -v KVM_RUN 7801.536 ( 0.003 ms): CPU 0/KVM/21276 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f484c6c73c0) = 0 7801.715 ( 0.001 ms): CPU 0/KVM/21276 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f484c6c73e0) = 0 11001.051 ( 0.008 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d70) = 1 11001.225 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d70) = 1 10750.377 (249.963 ms): CPU 0/KVM/21276 ... [continued]: ioctl()) = 0 11011.780 ( 0.015 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d90) = 1 11011.929 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x7fff053e1000) = 1 11012.090 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d70) = 1 11023.127 ( 0.020 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d90) = 1 11000.483 (249.807 ms): CPU 0/KVM/21276 ... [continued]: ioctl()) = 0 25620.877 ( 0.042 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7fff053e1080) = 0 25621.025 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7fff053e10a0) = 0 25500.803 (120.186 ms): CPU 0/KVM/21276 ... [continued]: ioctl()) = 0 25621.078 ( 0.005 ms): CPU 0/KVM/21276 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f484c6c73c0) = 0 25621.346 ( 0.001 ms): CPU 0/KVM/21276 ioctl(fd: 13, cmd: KVM_IRQ_LINE_STATUS, arg: 0x7f484c6c73e0) = 0 40456.997 ( 0.100 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x30, 0x8), arg: 0x7fff053dffe0) = 0 40457.100 ( 0.019 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x30, 0x8), arg: 0x7fff053dffe0) = 0 40457.133 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (READ|WRITE, 0xaf, 0x12, 0x8), arg: 0x7fff053dff60) = 0 40457.139 ( 0.001 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (READ|WRITE, 0xaf, 0x12, 0x8), arg: 0x7fff053dff60) = 0 40458.503 ( 0.027 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfc80) = 0 40458.601 ( 0.030 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfc80) = 0 40458.649 ( 0.003 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x21, 0x8), arg: 0x7fff053dff20) = 0 40458.654 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x21, 0x8), arg: 0x7fff053dff20) = 0 40458.657 ( 0.018 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQFD, arg: 0x7fff053dff00 ) = 0 40459.077 ( 0.017 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQFD, arg: 0x7fff053dff00 ) = 0 40459.123 ( 0.017 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfd20) = 0 40463.477 ( 0.013 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfd20) = 0 40464.874 ( 0.010 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_GET_VCPU_EVENTS, arg: 0x7fff053e0000) = 0 40464.892 ( 0.048 ms): qemu-system-x8/21238 ioctl(fd: 12, cmd: KVM_CHECK_EXTENSION, arg: 0x4c ) = 1 40464.991 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_GET_CLOCK, arg: 0x7fff053e0040) = 0 40464.962 ( 0.013 ms): CPU 0/KVM/21276 ioctl(fd: 20, cmd: KVM_GET_MSRS, arg: 0x7f484c6c7670) = 1 44540.437 ( 0.103 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_GSI_ROUTING, arg: 0x563c7c93c000) = 0 44540.544 ( 0.008 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQFD, arg: 0x7fff053dfea0 ) = 0 44540.555 ( 0.029 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_GSI_ROUTING, arg: 0x563c7c93c000) = 0 44540.586 ( 0.003 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IRQFD, arg: 0x7fff053dfea0 ) = 0 44540.592 ( 0.027 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_GSI_ROUTING, arg: 0x563c7c93c000) = 0 44540.625 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x21, 0x8), arg: 0x7fff053dfe20) = 0 44540.639 ( 0.018 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_GSI_ROUTING, arg: 0x563c7c93c000) = 0 44540.658 ( 0.003 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x21, 0x8), arg: 0x7fff053dfe20) = 0 44540.686 ( 0.015 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfbe0) = 0 44540.727 ( 0.014 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfbe0) = 0 44540.748 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0, 0x8), arg: 0x7fff053dfe88) = 0 44540.754 ( 0.026 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x3, 0x8), arg: 0x563c7c906870) = 0 44540.783 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x10, 0x8), arg: 0x7fff053dff00) = 0 44540.787 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x12, 0x8), arg: 0x7fff053dff00) = 0 44540.793 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x11, 0x28), arg: 0x7fff053dfe70) = 0 44540.796 ( 0.010 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x20, 0x8), arg: 0x7fff053dfef0) = 0 44540.811 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x10, 0x8), arg: 0x7fff053dff00) = 0 44540.814 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x12, 0x8), arg: 0x7fff053dff00) = 0 44540.819 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x11, 0x28), arg: 0x7fff053dfe70) = 0 44540.822 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x20, 0x8), arg: 0x7fff053dfef0) = 0 44540.837 ( 0.006 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x30, 0x8), arg: 0x7fff053dff80) = 0 44540.862 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: (WRITE, 0xaf, 0x30, 0x8), arg: 0x7fff053dff80) = 0 44540.887 ( 0.014 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfd00) = 0 44542.756 ( 0.020 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_IOEVENTFD, arg: 0x7fff053dfd00) = 0 44542.809 ( 0.007 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_VCPU_EVENTS, arg: 0x7fff053dffb0) = 0 44542.819 ( 0.003 ms): qemu-system-x8/21238 ioctl(fd: 12, cmd: KVM_CHECK_EXTENSION, arg: 0x4c ) = 1 44543.016 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SET_CLOCK, arg: 0x7fff053dfff0) = 0 44543.022 ( 0.008 ms): qemu-system-x8/21238 ioctl(fd: 20, cmd: KVM_KVMCLOCK_CTRL ) = 0 46952.502 ( 0.010 ms): qemu-system-x8/21238 ioctl(fd: 13, cmd: KVM_SIGNAL_MSI, arg: 0x563c83379d70) = 1 46829.292 (249.860 ms): CPU 0/KVM/21276 ... [continued]: ioctl()) = 0 ^C [root@jouet linux]# Since there are clashes in _IOC_NR() for some cases, notably ioctls with PPC_ and ARM_ in its name and some that depend on some internal state to be valid, but use the same number as others, those were removed in the shell script that builds the table, tools/perf/trace/beauty/kvm_ioctl.sh. Since so far we're supporting only x86 in the 'cmd' ioctl arg beautifier in perf trace, we can leave fully supporting these ioctls for later. There are some more to handle here, notably the one for /dev/vhost-net, will come later. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zxhebe579n338d7qrnjoctes@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 15 +++++++++++++-- tools/perf/trace/beauty/ioctl.c | 14 +++++++++++++- tools/perf/trace/beauty/kvm_ioctl.sh | 11 +++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100755 tools/perf/trace/beauty/kvm_ioctl.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d49354d340fd..6c1be10c6749 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -406,6 +406,13 @@ sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh $(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl) $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@ +kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c +kvm_hdr_dir := $(srctree)/tools/include/uapi/linux +kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh + +$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl) + $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -500,7 +507,10 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) $(sndrv_pcm_ioctl_array) $(sndrv_ctl_ioctl_array) +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ + $(sndrv_pcm_ioctl_array) \ + $(sndrv_ctl_ioctl_array) \ + $(kvm_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -768,7 +778,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ - $(OUTPUT)$(sndrv_pcm_ioctl_array) + $(OUTPUT)$(sndrv_pcm_ioctl_array) \ + $(OUTPUT)$(kvm_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index ec5a5a499667..89fa4eb68247 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -77,6 +77,17 @@ static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, char *bf, size_t size) return scnprintf(bf, size, "(%#x, %#x)", 'U', nr); } +static size_t ioctl__scnprintf_kvm_cmd(int nr, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/kvm_ioctl_array.c" + static DEFINE_STRARRAY(kvm_ioctl_cmds); + + if (nr < strarray__kvm_ioctl_cmds.nr_entries && strarray__kvm_ioctl_cmds.entries[nr] != NULL) + return scnprintf(bf, size, "KVM_%s", strarray__kvm_ioctl_cmds.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x)", 0xAE, nr); +} + static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { int dir = _IOC_DIR(cmd), @@ -91,7 +102,8 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, }, ['T' - 'A']= { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, ['U' - 'A']= { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, }, - ['d' - 'A'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, } + ['d' - 'A'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, }, + [0xAE - 'A'] = { .type = 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, }, }; const int nr_types = ARRAY_SIZE(ioctl_types); diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh new file mode 100755 index 000000000000..bd28817afced --- /dev/null +++ b/tools/perf/trace/beauty/kvm_ioctl.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +kvm_header_dir=$1 + +printf "static const char *kvm_ioctl_cmds[] = {\n" +regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' +egrep $regex ${kvm_header_dir}/kvm.h | \ + sed -r "s/$regex/\2 \1/g" | \ + egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \ + sort | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" From 8ff69577075e0321ef25d3afcb293db39a31342a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Aug 2017 11:46:36 -0300 Subject: [PATCH 156/253] perf trace beauty ioctl: Pass _IOC_DIR to the per _IOC_TYPE scnprintf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all subsystems use the fact that we may have the same _IOC_NR for different _IOC_DIR, as in the end it'll result in a different ioctl number. So, for instance, vhost virtio has: #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) So same _IOC_NR (0x00) but different _IOC_DIR (R versus W), but it also have: #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) A "get" operation that uses a "W" _IOC_DIR, and its implementation, uses copy_to_user, it should've probably been _IOR(). Then: /* Base value where queue looks for available descriptors */ #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) /* Get accessor: reads index, writes value in num */ #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) So we'll need to use _IOC_DIR() to disambiguate the VHOST_VIRTIO ioctl bautifier. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-rq6q717ql7j2z7kuccafgq84@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/ioctl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 89fa4eb68247..8633d96ed131 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -19,7 +19,7 @@ */ #include -static size_t ioctl__scnprintf_tty_cmd(int nr, char *bf, size_t size) +static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size) { static const char *ioctl_tty_cmd[] = { "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW", @@ -41,10 +41,10 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, char *bf, size_t size) if (nr < strarray__ioctl_tty_cmd.nr_entries && strarray__ioctl_tty_cmd.entries[nr] != NULL) return scnprintf(bf, size, "%s", strarray__ioctl_tty_cmd.entries[nr]); - return scnprintf(bf, size, "(%#x, %#x)", 'T', nr); + return scnprintf(bf, size, "(%#x, %#x, %#x)", 'T', nr, dir); } -static size_t ioctl__scnprintf_drm_cmd(int nr, char *bf, size_t size) +static size_t ioctl__scnprintf_drm_cmd(int nr, int dir, char *bf, size_t size) { #include "trace/beauty/generated/ioctl/drm_ioctl_array.c" static DEFINE_STRARRAY(drm_ioctl_cmds); @@ -52,10 +52,10 @@ static size_t ioctl__scnprintf_drm_cmd(int nr, char *bf, size_t size) if (nr < strarray__drm_ioctl_cmds.nr_entries && strarray__drm_ioctl_cmds.entries[nr] != NULL) return scnprintf(bf, size, "DRM_%s", strarray__drm_ioctl_cmds.entries[nr]); - return scnprintf(bf, size, "(%#x, %#x)", 'd', nr); + return scnprintf(bf, size, "(%#x, %#x, %#x)", 'd', nr, dir); } -static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, char *bf, size_t size) +static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, int dir, char *bf, size_t size) { #include "trace/beauty/generated/ioctl/sndrv_pcm_ioctl_array.c" static DEFINE_STRARRAY(sndrv_pcm_ioctl_cmds); @@ -63,10 +63,10 @@ static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, char *bf, size_t size) if (nr < strarray__sndrv_pcm_ioctl_cmds.nr_entries && strarray__sndrv_pcm_ioctl_cmds.entries[nr] != NULL) return scnprintf(bf, size, "SNDRV_PCM_%s", strarray__sndrv_pcm_ioctl_cmds.entries[nr]); - return scnprintf(bf, size, "(%#x, %#x)", 'A', nr); + return scnprintf(bf, size, "(%#x, %#x, %#x)", 'A', nr, dir); } -static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, char *bf, size_t size) +static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, int dir, char *bf, size_t size) { #include "trace/beauty/generated/ioctl/sndrv_ctl_ioctl_array.c" static DEFINE_STRARRAY(sndrv_ctl_ioctl_cmds); @@ -74,10 +74,10 @@ static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, char *bf, size_t size) if (nr < strarray__sndrv_ctl_ioctl_cmds.nr_entries && strarray__sndrv_ctl_ioctl_cmds.entries[nr] != NULL) return scnprintf(bf, size, "SNDRV_CTL_%s", strarray__sndrv_ctl_ioctl_cmds.entries[nr]); - return scnprintf(bf, size, "(%#x, %#x)", 'U', nr); + return scnprintf(bf, size, "(%#x, %#x, %#x)", 'U', nr, dir); } -static size_t ioctl__scnprintf_kvm_cmd(int nr, char *bf, size_t size) +static size_t ioctl__scnprintf_kvm_cmd(int nr, int dir, char *bf, size_t size) { #include "trace/beauty/generated/ioctl/kvm_ioctl_array.c" static DEFINE_STRARRAY(kvm_ioctl_cmds); @@ -85,7 +85,7 @@ static size_t ioctl__scnprintf_kvm_cmd(int nr, char *bf, size_t size) if (nr < strarray__kvm_ioctl_cmds.nr_entries && strarray__kvm_ioctl_cmds.entries[nr] != NULL) return scnprintf(bf, size, "KVM_%s", strarray__kvm_ioctl_cmds.entries[nr]); - return scnprintf(bf, size, "(%#x, %#x)", 0xAE, nr); + return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir); } static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) @@ -97,7 +97,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) int printed = 0; static const struct ioctl_type { int type; - size_t (*scnprintf)(int nr, char *bf, size_t size); + size_t (*scnprintf)(int nr, int dir, char *bf, size_t size); } ioctl_types[] = { /* Must be ordered by type */ { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, }, ['T' - 'A']= { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, @@ -111,7 +111,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) const int index = type - ioctl_types[0].type; if (ioctl_types[index].scnprintf != NULL) - return ioctl_types[index].scnprintf(nr, bf, size); + return ioctl_types[index].scnprintf(nr, dir, bf, size); } printed += scnprintf(bf + printed, size - printed, "%c", '('); From d02b395e118d7d4100caeeab553cebd182fdefd5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 16:45:32 -0300 Subject: [PATCH 157/253] tools include uapi: Grab a copy of linux/vhost.h We will use it to generate tables for beautifying ioctl's 'cmd' arg. Cc: Adrian Hunter Cc: David Ahern Cc: Jason Wang Cc: Jiri Olsa Cc: "Michael S. Tsirkin" Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nxwpq34hu6te1m2ra5m7o8n9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/vhost.h | 209 +++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 1 + 3 files changed, 211 insertions(+) create mode 100644 tools/include/uapi/linux/vhost.h diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h new file mode 100644 index 000000000000..60180c0b5dc6 --- /dev/null +++ b/tools/include/uapi/linux/vhost.h @@ -0,0 +1,209 @@ +#ifndef _LINUX_VHOST_H +#define _LINUX_VHOST_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include +#include +#include +#include +#include + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +/* no alignment requirement */ +struct vhost_iotlb_msg { + __u64 iova; + __u64 size; + __u64 uaddr; +#define VHOST_ACCESS_RO 0x1 +#define VHOST_ACCESS_WO 0x2 +#define VHOST_ACCESS_RW 0x3 + __u8 perm; +#define VHOST_IOTLB_MISS 1 +#define VHOST_IOTLB_UPDATE 2 +#define VHOST_IOTLB_INVALIDATE 3 +#define VHOST_IOTLB_ACCESS_FAIL 4 + __u8 type; +}; + +#define VHOST_IOTLB_MSG 0x1 + +struct vhost_msg { + int type; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* ioctls */ + +#define VHOST_VIRTIO 0xAF + +/* Features bitmask for forward compatibility. Transport bits are used for + * vhost specific features. */ +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) + +/* Set current process as the (exclusive) owner of this file descriptor. This + * must be called before any other vhost command. Further calls to + * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +/* Give up ownership, and reset the device to default values. + * Allows subsequent call to VHOST_OWNER_SET to succeed. */ +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) + +/* Set up/modify memory layout */ +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) + +/* Write logging setup. */ +/* Memory writes can optionally be logged by setting bit at an offset + * (calculated from the physical address) from specified log base. + * The bit is set using an atomic 32 bit operation. */ +/* Set base address for logging. */ +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Specify an eventfd file descriptor to signal on log write. */ +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + +/* Ring setup. */ +/* Set number of descriptors in ring. This parameter can not + * be modified while ring is running (bound to a device). */ +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +/* Set addresses for the ring. */ +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +/* Base value where queue looks for available descriptors */ +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Get accessor: reads index, writes value in num */ +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) + +/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN + * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL). + * The byte order cannot be changed while the device is active: trying to do so + * returns -EBUSY. + * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is + * set. + * Not all kernel configurations support this ioctl, but all configurations that + * support SET also support GET. + */ +#define VHOST_VRING_LITTLE_ENDIAN 0 +#define VHOST_VRING_BIG_ENDIAN 1 +#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) +#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) + +/* The following ioctls use eventfd file descriptors to signal and poll + * for events. */ + +/* Set eventfd to poll for added buffers */ +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +/* Set eventfd to signal when buffers have beed used */ +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +/* Set eventfd to signal an error */ +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +/* Set busy loop timeout (in us) */ +#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \ + struct vhost_vring_state) +/* Get busy loop timeout (in us) */ +#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ + struct vhost_vring_state) + +/* VHOST_NET specific defines */ + +/* Attach virtio net ring to a raw socket, or tap device. + * The socket must be already bound to an ethernet device, this device will be + * used for transmit. Pass fd -1 to unbind from the socket and the transmit + * device. This can be used to stop the ring (e.g. for migration). */ +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +/* VHOST_SCSI specific definitions */ + +/* + * Used by QEMU userspace to ensure a consistent vhost-scsi ABI. + * + * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate + + * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage + * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target. + * All the targets under vhost_wwpn can be seen and used by guset. + */ + +#define VHOST_SCSI_ABI_VERSION 1 + +struct vhost_scsi_target { + int abi_version; + char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */ + unsigned short vhost_tpgt; + unsigned short reserved; +}; + +#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) +#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) +/* Changing this breaks userspace. */ +#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int) +/* Set and get the events missed flag */ +#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) +#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) + +/* VHOST_VSOCK specific defines */ + +#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) +#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) + +#endif diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 513f52a484fd..62072822dc85 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -85,6 +85,7 @@ tools/include/uapi/linux/mman.h tools/include/uapi/linux/perf_event.h tools/include/uapi/linux/sched.h tools/include/uapi/linux/stat.h +tools/include/uapi/linux/vhost.h tools/include/uapi/sound/asound.h tools/include/linux/poison.h tools/include/linux/rbtree.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 1f009506ff63..932fda54b8a6 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -8,6 +8,7 @@ include/uapi/linux/kvm.h include/uapi/linux/perf_event.h include/uapi/linux/sched.h include/uapi/linux/stat.h +include/uapi/linux/vhost.h include/uapi/sound/asound.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h From ec6dd85f6e39bf516f4420d62270380b96bbee57 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 17:34:47 -0300 Subject: [PATCH 158/253] perf trace beautify ioctl: Beautify vhost virtio ioctl's 'cmd' arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also trying a new approach, using a copy of uapi/linux/vhost.h we auto generate the string tables, then include it in the ioctl cmd beautifier. This way either the KVM developers will add the new commands to the tools/ copy, like is happening with other areas of tools/include/ (bpf.h comes to mind), or we'll be notified when building perf that our copy drifted. E.g., doing syswide tracing grepping for the newly beautified VHOST ioctls: # perf trace -e ioctl 2>&1 | grep VHOST 3873.064 ( 0.099 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_NET_SET_BACKEND, arg: 0x7fff053dffe0) = 0 3873.168 ( 0.019 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_NET_SET_BACKEND, arg: 0x7fff053dffe0) = 0 3873.226 ( 0.006 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_GET_VRING_BASE, arg: 0x7fff053dff60) = 0 3873.244 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_GET_VRING_BASE, arg: 0x7fff053dff60) = 0 3873.817 ( 0.014 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_CALL, arg: 0x7fff053dff20) = 0 3873.838 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_CALL, arg: 0x7fff053dff20) = 0 4701.372 ( 0.006 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_CALL, arg: 0x7fff053dfe20) = 0 4701.417 ( 0.007 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_CALL, arg: 0x7fff053dfe20) = 0 4701.563 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_FEATURES, arg: 0x7fff053dfe88) = 0 4701.571 ( 0.028 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_MEM_TABLE, arg: 0x563c7c906870) = 0 4701.604 ( 0.003 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_NUM, arg: 0x7fff053dff00) = 0 4701.609 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_BASE, arg: 0x7fff053dff00) = 0 4701.615 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_ADDR, arg: 0x7fff053dfe70) = 0 4701.619 ( 0.008 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_KICK, arg: 0x7fff053dfef0) = 0 4701.634 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_NUM, arg: 0x7fff053dff00) = 0 4701.640 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_BASE, arg: 0x7fff053dff00) = 0 4701.644 ( 0.002 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_ADDR, arg: 0x7fff053dfe70) = 0 4701.648 ( 0.009 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_SET_VRING_KICK, arg: 0x7fff053dfef0) = 0 4701.665 ( 0.005 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_NET_SET_BACKEND, arg: 0x7fff053dff80) = 0 4701.672 ( 0.004 ms): qemu-system-x8/21238 ioctl(fd: 27, cmd: VHOST_NET_SET_BACKEND, arg: 0x7fff053dff80) = 0 ^C '-e ioctl' uses tracepoint filters, in time this will be replaces by eBPF filters hooked at the syscall tracepoints and that "grep VHOST" will also be done with eBPF, right at the kernel, to reduce overhead. Cc: Adrian Hunter Cc: David Ahern Cc: Jason Wang Cc: Jiri Olsa Cc: "Michael S. Tsirkin" Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2gthnhpliunvakywjterrzz3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 13 +++++++++++-- tools/perf/trace/beauty/ioctl.c | 14 ++++++++++++++ tools/perf/trace/beauty/vhost_virtio_ioctl.sh | 17 +++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100755 tools/perf/trace/beauty/vhost_virtio_ioctl.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6c1be10c6749..606358d67f8a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -413,6 +413,13 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl) $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@ +vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c +vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux +vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh + +$(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl) + $(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -510,7 +517,8 @@ build-dir = $(if $(__build-dir),$(__build-dir),.) prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ - $(kvm_ioctl_array) + $(kvm_ioctl_array) \ + $(vhost_virtio_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -779,7 +787,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ - $(OUTPUT)$(kvm_ioctl_array) + $(OUTPUT)$(kvm_ioctl_array) \ + $(OUTPUT)$(vhost_virtio_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 8633d96ed131..95434b27873d 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -88,6 +88,19 @@ static size_t ioctl__scnprintf_kvm_cmd(int nr, int dir, char *bf, size_t size) return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir); } +static size_t ioctl__scnprintf_vhost_virtio_cmd(int nr, int dir, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c" + static DEFINE_STRARRAY(vhost_virtio_ioctl_cmds); + static DEFINE_STRARRAY(vhost_virtio_ioctl_read_cmds); + struct strarray *s = (dir & _IOC_READ) ? &strarray__vhost_virtio_ioctl_read_cmds : &strarray__vhost_virtio_ioctl_cmds; + + if (nr < s->nr_entries && s->entries[nr] != NULL) + return scnprintf(bf, size, "VHOST_%s", s->entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAF, nr, dir); +} + static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { int dir = _IOC_DIR(cmd), @@ -104,6 +117,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) ['U' - 'A']= { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, }, ['d' - 'A'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, }, [0xAE - 'A'] = { .type = 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, }, + [0xAF - 'A'] = { .type = 0xAF, .scnprintf = ioctl__scnprintf_vhost_virtio_cmd, }, }; const int nr_types = ARRAY_SIZE(ioctl_types); diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh new file mode 100755 index 000000000000..76f1de697787 --- /dev/null +++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +vhost_virtio_header_dir=$1 + +printf "static const char *vhost_virtio_ioctl_cmds[] = {\n" +regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' +egrep $regex ${vhost_virtio_header_dir}/vhost.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" + +printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n" +regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' +egrep $regex ${vhost_virtio_header_dir}/vhost.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" From 81e3d8b2af2e7417f1d5164aab5c1a75955e8a5d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jul 2017 17:34:47 -0300 Subject: [PATCH 159/253] perf trace beautify ioctl: Beautify perf ioctl's 'cmd' arg Also trying a new approach, using the copy of uapi/linux/perf_event.h we auto generate the string tables, then include it in the ioctl cmd beautifier. This way either the perf developers will add the new commands to the tools/ copy, like is happening with other areas of tools/include/ (bpf.h comes to mind), or we'll be notified when building perf that our copy drifted. E.g., looking at some of the perf ioctls issued by the 'perf test' test cases: # (perf trace -e perf_event_open,ioctl perf test) 2>&1 | egrep "(cmd: PERF_|perf_event_open)" 4: Read samples using the mmap interface : 348.811 ( 0.062 ms): perf/23351 perf_event_open(attr_uptr: 0x414a5e8, pid: 23351 (perf), group_fd: -1, flags: FD_CLOEXEC) = 3 348.878 ( 0.039 ms): perf/23351 perf_event_open(attr_uptr: 0x414a5e8, pid: 23351 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 4 348.919 ( 0.036 ms): perf/23351 perf_event_open(attr_uptr: 0x414a5e8, pid: 23351 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 5 348.958 ( 0.036 ms): perf/23351 perf_event_open(attr_uptr: 0x414a5e8, pid: 23351 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 6 349.070 ( 0.046 ms): perf/23351 perf_event_open(attr_uptr: 0x414aa38, pid: 23351 (perf), group_fd: -1, flags: FD_CLOEXEC) = 7 349.120 ( 0.037 ms): perf/23351 perf_event_open(attr_uptr: 0x414aa38, pid: 23351 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 8 349.161 ( 0.036 ms): perf/23351 perf_event_open(attr_uptr: 0x414aa38, pid: 23351 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 9 349.201 ( 0.035 ms): perf/23351 perf_event_open(attr_uptr: 0x414aa38, pid: 23351 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 10 349.306 ( 0.041 ms): perf/23351 perf_event_open(attr_uptr: 0x414b2d8, pid: 23351 (perf), group_fd: -1, flags: FD_CLOEXEC) = 11 349.611 ( 0.005 ms): perf/23351 ioctl(fd: 3, cmd: PERF_ID, arg: 0x7fff025999b8) = 0 349.619 ( 0.002 ms): perf/23351 ioctl(fd: 7, cmd: PERF_SET_OUTPUT, arg: 0x3 ) = 0 349.623 ( 0.002 ms): perf/23351 ioctl(fd: 7, cmd: PERF_ID, arg: 0x7fff025999b8) = 0 349.627 ( 0.002 ms): perf/23351 ioctl(fd: 11, cmd: PERF_SET_OUTPUT, arg: 0x3 ) = 0 349.630 ( 0.001 ms): perf/23351 ioctl(fd: 11, cmd: PERF_ID, arg: 0x7fff025999b8) = 0 7: PERF_RECORD_* events & perf_sample fields : 647.150 ( 0.014 ms): perf/23354 perf_event_open(attr_uptr: 0x7fff02599920, pid: -1, cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 3 647.197 ( 0.076 ms): perf/23354 perf_event_open(attr_uptr: 0x414b478, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 647.289 ( 0.040 ms): perf/23354 perf_event_open(attr_uptr: 0x414b478, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 647.368 ( 0.011 ms): perf/23354 perf_event_open(attr_uptr: 0x414a5e8, pid: 23355 (perf), group_fd: -1, flags: FD_CLOEXEC) = 3 647.381 ( 0.005 ms): perf/23354 perf_event_open(attr_uptr: 0x414a5e8, pid: 23355 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 4 647.387 ( 0.005 ms): perf/23354 perf_event_open(attr_uptr: 0x414a5e8, pid: 23355 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 5 647.393 ( 0.004 ms): perf/23354 perf_event_open(attr_uptr: 0x414a5e8, pid: 23355 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 7 648.026 ( 0.011 ms): perf/23354 ioctl(fd: 3, cmd: PERF_ENABLE) = 0 648.038 ( 0.002 ms): perf/23354 ioctl(fd: 4, cmd: PERF_ENABLE) = 0 648.042 ( 0.002 ms): perf/23354 ioctl(fd: 5, cmd: PERF_ENABLE) = 0 648.045 ( 0.002 ms): perf/23354 ioctl(fd: 7, cmd: PERF_ENABLE) = 0 18: Breakpoint overflow signal handler : 2772.721 ( 0.017 ms): perf/23375 perf_event_open(attr_uptr: 0x7fff02599d20, pid: -1, cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 3 2772.748 ( 0.009 ms): perf/23375 perf_event_open(attr_uptr: 0x7fff02599e60, cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 2772.768 ( 0.002 ms): perf/23375 ioctl(fd: 3, cmd: PERF_RESET) = 0 2772.776 ( 0.008 ms): perf/23375 perf_event_open(attr_uptr: 0x7fff02599e60, cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 2772.788 ( 0.002 ms): perf/23375 ioctl(fd: 4, cmd: PERF_RESET) = 0 2772.791 ( 0.006 ms): perf/23375 perf_event_open(attr_uptr: 0x7fff02599e60, cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 2772.800 ( 0.001 ms): perf/23375 ioctl(fd: 5, cmd: PERF_RESET) = 0 2772.803 ( 0.005 ms): perf/23375 ioctl(fd: 3, cmd: PERF_ENABLE) = 0 2772.810 ( 0.004 ms): perf/23375 ioctl(fd: 4, cmd: PERF_ENABLE) = 0 2772.815 ( 0.004 ms): perf/23375 ioctl(fd: 5, cmd: PERF_ENABLE) = 0 Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ahotwscqt080ae0ulu3zznh2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 13 +++++++++++-- tools/perf/trace/beauty/ioctl.c | 24 ++++++++++++++++++------ tools/perf/trace/beauty/perf_ioctl.sh | 10 ++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) create mode 100755 tools/perf/trace/beauty/perf_ioctl.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 606358d67f8a..c1f7884979e2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -420,6 +420,13 @@ vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl. $(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl) $(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@ +perf_ioctl_array := $(beauty_ioctl_outdir)/perf_ioctl_array.c +perf_hdr_dir := $(srctree)/tools/include/uapi/linux +perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh + +$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl) + $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -518,7 +525,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ - $(vhost_virtio_ioctl_array) + $(vhost_virtio_ioctl_array) \ + $(perf_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -788,7 +796,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ - $(OUTPUT)$(vhost_virtio_ioctl_array) + $(OUTPUT)$(vhost_virtio_ioctl_array) \ + $(OUTPUT)$(perf_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 95434b27873d..1be3b4cf0827 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -101,6 +101,17 @@ static size_t ioctl__scnprintf_vhost_virtio_cmd(int nr, int dir, char *bf, size_ return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAF, nr, dir); } +static size_t ioctl__scnprintf_perf_cmd(int nr, int dir, char *bf, size_t size) +{ +#include "trace/beauty/generated/ioctl/perf_ioctl_array.c" + static DEFINE_STRARRAY(perf_ioctl_cmds); + + if (nr < strarray__perf_ioctl_cmds.nr_entries && strarray__perf_ioctl_cmds.entries[nr] != NULL) + return scnprintf(bf, size, "PERF_%s", strarray__perf_ioctl_cmds.entries[nr]); + + return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir); +} + static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) { int dir = _IOC_DIR(cmd), @@ -112,12 +123,13 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size) int type; size_t (*scnprintf)(int nr, int dir, char *bf, size_t size); } ioctl_types[] = { /* Must be ordered by type */ - { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, }, - ['T' - 'A']= { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, - ['U' - 'A']= { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, }, - ['d' - 'A'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, }, - [0xAE - 'A'] = { .type = 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, }, - [0xAF - 'A'] = { .type = 0xAF, .scnprintf = ioctl__scnprintf_vhost_virtio_cmd, }, + { .type = '$', .scnprintf = ioctl__scnprintf_perf_cmd, }, + ['A' - '$'] = { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, }, + ['T' - '$'] = { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, }, + ['U' - '$'] = { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, }, + ['d' - '$'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, }, + [0xAE - '$'] = { .type = 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, }, + [0xAF - '$'] = { .type = 0xAF, .scnprintf = ioctl__scnprintf_vhost_virtio_cmd, }, }; const int nr_types = ARRAY_SIZE(ioctl_types); diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh new file mode 100755 index 000000000000..faea4237c793 --- /dev/null +++ b/tools/perf/trace/beauty/perf_ioctl.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *perf_ioctl_cmds[] = {\n" +regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*' +egrep $regex ${header_dir}/perf_event.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" From 910448bbed066ab1082b510eef1ae61bb792d854 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Wed, 14 Jun 2017 11:26:57 -0500 Subject: [PATCH 160/253] perf/x86/amd/uncore: Rename cpufeatures macro for cache counters In Family 17h, L3 is the last level cache as opposed to L2 in previous families. Avoid this name confusion and rename X86_FEATURE_PERFCTR_L2 to X86_FEATURE_PERFCTR_LLC to indicate the performance counter on the last level of cache. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/016311029fdecdc3fdc13b7ed865c6cbf48b2f15.1497452002.git.Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 2 +- arch/x86/include/asm/cpufeatures.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index ad44af0dd667..e34f8a6b4440 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -555,7 +555,7 @@ static int __init amd_uncore_init(void) ret = 0; } - if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { + if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { amd_uncore_llc = alloc_percpu(struct amd_uncore *); if (!amd_uncore_llc) { ret = -ENOMEM; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ca3c48c0872f..73a712736e2c 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -177,7 +177,7 @@ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* From ab027620e95987b5f0145013090a109b4152d23b Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Wed, 14 Jun 2017 11:26:58 -0500 Subject: [PATCH 161/253] perf/x86/amd/uncore: Get correct number of cores sharing last level cache In Family 17h, the number of cores sharing a cache level is obtained from the Cache Properties CPUID leaf (0x8000001d) by passing in the cache level in ECX. In prior families, a cache level of 2 was used to determine this information. To get the right information, irrespective of Family, iterate over the cache levels using CPUID 0x8000001d. The last level cache is the last value to return a non-zero value in EAX. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5ab569025b39cdfaeca55b571d78c0fc800bdb69.1497452002.git.Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e34f8a6b4440..f5cbbba99283 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -400,11 +400,24 @@ static int amd_uncore_cpu_starting(unsigned int cpu) if (amd_uncore_llc) { unsigned int apicid = cpu_data(cpu).apicid; - unsigned int nshared; + unsigned int nshared, subleaf, prev_eax = 0; uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); - nshared = ((eax >> 14) & 0xfff) + 1; + /* + * Iterate over Cache Topology Definition leaves until no + * more cache descriptions are available. + */ + for (subleaf = 0; subleaf < 5; subleaf++) { + cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx); + + /* EAX[0:4] gives type of cache */ + if (!(eax & 0x1f)) + break; + + prev_eax = eax; + } + nshared = ((prev_eax >> 14) & 0xfff) + 1; + uncore->id = apicid - (apicid % nshared); uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); From fdccc3fb7a42ea4e4cd77d2fb8fa3a45c66ec0bf Mon Sep 17 00:00:00 2001 From: "leilei.lin" Date: Wed, 9 Aug 2017 08:29:21 +0800 Subject: [PATCH 162/253] perf/core: Reduce context switch overhead Skip most of the PMU context switching overhead when ctx->nr_events is 0. 50% performance overhead was observed under an extreme testcase. Signed-off-by: leilei.lin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: eranian@gmail.com Cc: jolsa@redhat.com Cc: linxiulei@gmail.com Cc: yang_oliver@hotmail.com Link: http://lkml.kernel.org/r/20170809002921.69813-1-leilei.lin@alibaba-inc.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- kernel/events/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index ee20d4c546b5..d704e23914bf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3211,6 +3211,13 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, return; perf_ctx_lock(cpuctx, ctx); + /* + * We must check ctx->nr_events while holding ctx->lock, such + * that we serialize against perf_install_in_context(). + */ + if (!ctx->nr_events) + goto unlock; + perf_pmu_disable(ctx->pmu); /* * We want to keep the following priority order: @@ -3224,6 +3231,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); perf_event_sched_in(cpuctx, ctx, task); perf_pmu_enable(ctx->pmu); + +unlock: perf_ctx_unlock(cpuctx, ctx); } From b4464bf977004832f63f31c015751c049bc47dde Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:35:04 +0900 Subject: [PATCH 163/253] h8300: Mark _stext and _etext as char-arrays, not single char variables Mark _stext and _etext as character arrays instead of single character variables, like include/asm-generic/sections.h does. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172769415.27216.12021110228384155707.stgit@devbox Signed-off-by: Ingo Molnar --- arch/h8300/include/asm/traps.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/h8300/include/asm/traps.h b/arch/h8300/include/asm/traps.h index 15e701130b27..1c5a30ec2df8 100644 --- a/arch/h8300/include/asm/traps.h +++ b/arch/h8300/include/asm/traps.h @@ -33,9 +33,9 @@ extern unsigned long *_interrupt_redirect_table; #define TRAP2_VEC 10 #define TRAP3_VEC 11 -extern char _start, _etext; +extern char _start[], _etext[]; #define check_kernel_text(addr) \ - ((addr >= (unsigned long)(&_start)) && \ - (addr < (unsigned long)(&_etext)) && !(addr & 1)) + ((addr >= (unsigned long)(_start)) && \ + (addr < (unsigned long)(_etext)) && !(addr & 1)) #endif /* _H8300_TRAPS_H */ From 1824436262b2f43a46051a4958e2dd58a9d9aadf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:36:09 +0900 Subject: [PATCH 164/253] xtensa: Mark _stext and _end as char-arrays, not single char variables Mark _stext and _end as character arrays instead of single character variables, like include/asm-generic/sections.h does. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172775958.27216.12951305461398200544.stgit@devbox Signed-off-by: Ingo Molnar --- arch/xtensa/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 33bfa5270d95..08175df7a69e 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -273,8 +273,8 @@ void __init init_arch(bp_tag_t *bp_start) * Initialize system. Setup memory and reserve regions. */ -extern char _end; -extern char _stext; +extern char _end[]; +extern char _stext[]; extern char _WindowVectors_text_start; extern char _WindowVectors_text_end; extern char _DebugInterruptVector_literal_start; @@ -333,7 +333,7 @@ void __init setup_arch(char **cmdline_p) } #endif - mem_reserve(__pa(&_stext), __pa(&_end)); + mem_reserve(__pa(_stext), __pa(_end)); #ifdef CONFIG_VECTORS_OFFSET mem_reserve(__pa(&_WindowVectors_text_start), From c2579fee22483b0f156099abd9996d900634562c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:37:15 +0900 Subject: [PATCH 165/253] cris: Mark _stext and _end as char-arrays, not single char variables Mark _stext and _end as character arrays instead of single character variable, like include/asm-generic/sections.h does. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172782555.27216.2805751327900543374.stgit@devbox Signed-off-by: Ingo Molnar --- arch/cris/arch-v32/mach-a3/arbiter.c | 4 ++-- arch/cris/arch-v32/mach-fs/arbiter.c | 4 ++-- arch/cris/kernel/traps.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/cris/arch-v32/mach-a3/arbiter.c b/arch/cris/arch-v32/mach-a3/arbiter.c index ab5c421a4de8..735a9b0abdb8 100644 --- a/arch/cris/arch-v32/mach-a3/arbiter.c +++ b/arch/cris/arch-v32/mach-a3/arbiter.c @@ -227,7 +227,7 @@ static void crisv32_arbiter_config(int arbiter, int region, int unused_slots) } } -extern char _stext, _etext; +extern char _stext[], _etext[]; static void crisv32_arbiter_init(void) { @@ -265,7 +265,7 @@ static void crisv32_arbiter_init(void) #ifndef CONFIG_ETRAX_KGDB /* Global watch for writes to kernel text segment. */ - crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext, + crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext, MARB_CLIENTS(arbiter_all_clients, arbiter_bar_all_clients), arbiter_all_write, NULL); #endif diff --git a/arch/cris/arch-v32/mach-fs/arbiter.c b/arch/cris/arch-v32/mach-fs/arbiter.c index c97f4d8120f9..047c70bdbb23 100644 --- a/arch/cris/arch-v32/mach-fs/arbiter.c +++ b/arch/cris/arch-v32/mach-fs/arbiter.c @@ -158,7 +158,7 @@ static void crisv32_arbiter_config(int region, int unused_slots) } } -extern char _stext, _etext; +extern char _stext[], _etext[]; static void crisv32_arbiter_init(void) { @@ -190,7 +190,7 @@ static void crisv32_arbiter_init(void) #ifndef CONFIG_ETRAX_KGDB /* Global watch for writes to kernel text segment. */ - crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext, + crisv32_arbiter_watch(virt_to_phys(_stext), _etext - _stext, arbiter_all_clients, arbiter_all_write, NULL); #endif } diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index a01636a12a6e..d98131c45bb5 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -42,7 +42,7 @@ void (*nmi_handler)(struct pt_regs *); void show_trace(unsigned long *stack) { unsigned long addr, module_start, module_end; - extern char _stext, _etext; + extern char _stext[], _etext[]; int i; pr_err("\nCall Trace: "); @@ -69,8 +69,8 @@ void show_trace(unsigned long *stack) * down the cause of the crash will be able to figure * out the call path that was taken. */ - if (((addr >= (unsigned long)&_stext) && - (addr <= (unsigned long)&_etext)) || + if (((addr >= (unsigned long)_stext) && + (addr <= (unsigned long)_etext)) || ((addr >= module_start) && (addr <= module_end))) { #ifdef CONFIG_KALLSYMS print_ip_sym(addr); From 229a71860547ec856b156179a9c6bef2de426f66 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:38:21 +0900 Subject: [PATCH 166/253] irq: Make the irqentry text section unconditional Generate irqentry and softirqentry text sections without any Kconfig dependencies. This will add extra sections, but there should be no performace impact. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox Signed-off-by: Ingo Molnar --- arch/arm/include/asm/traps.h | 7 ------- arch/arm64/include/asm/traps.h | 7 ------- arch/x86/entry/entry_64.S | 9 ++------- arch/x86/kernel/unwind_frame.c | 2 -- include/asm-generic/sections.h | 4 ++++ include/asm-generic/vmlinux.lds.h | 8 -------- include/linux/interrupt.h | 14 +------------- 7 files changed, 7 insertions(+), 44 deletions(-) diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index f555bb3664dc..683d9230984a 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -18,7 +18,6 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { extern char __irqentry_text_start[]; @@ -27,12 +26,6 @@ static inline int __in_irqentry_text(unsigned long ptr) return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 02e9035b0685..47a9066f7c86 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -37,18 +37,11 @@ void unregister_undef_hook(struct undef_hook *hook); void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { return ptr >= (unsigned long)&__irqentry_text_start && ptr < (unsigned long)&__irqentry_text_end; } -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif static inline int in_exception_text(unsigned long ptr) { diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d271fb79248f..3e3da2928c44 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -675,13 +675,8 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) #endif /* Make sure APIC interrupt handlers end up in the irqentry section: */ -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) -# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" -# define POP_SECTION_IRQENTRY .popsection -#else -# define PUSH_SECTION_IRQENTRY -# define POP_SECTION_IRQENTRY -#endif +#define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" +#define POP_SECTION_IRQENTRY .popsection .macro apicinterrupt num sym do_sym PUSH_SECTION_IRQENTRY diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index b9389d72b2f7..c29e5bc7e9c9 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -91,10 +91,8 @@ static bool in_entry_code(unsigned long ip) if (addr >= __entry_text_start && addr < __entry_text_end) return true; -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) if (addr >= __irqentry_text_start && addr < __irqentry_text_end) return true; -#endif return false; } diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 532372c6cf15..e5da44eddd2f 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -27,6 +27,8 @@ * __kprobes_text_start, __kprobes_text_end * __entry_text_start, __entry_text_end * __ctors_start, __ctors_end + * __irqentry_text_start, __irqentry_text_end + * __softirqentry_text_start, __softirqentry_text_end */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; @@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __softirqentry_text_start[], __softirqentry_text_end[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..62e2395f0a82 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -483,25 +483,17 @@ *(.entry.text) \ VMLINUX_SYMBOL(__entry_text_end) = .; -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define IRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__irqentry_text_start) = .; \ *(.irqentry.text) \ VMLINUX_SYMBOL(__irqentry_text_end) = .; -#else -#define IRQENTRY_TEXT -#endif -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define SOFTIRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__softirqentry_text_start) = .; \ *(.softirqentry.text) \ VMLINUX_SYMBOL(__softirqentry_text_end) = .; -#else -#define SOFTIRQENTRY_TEXT -#endif /* Section used for early init (in .S files) */ #define HEAD_TEXT *(.head.text) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a2fddddb0d60..59ba11661b6e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -726,7 +727,6 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ @@ -734,16 +734,4 @@ extern int arch_early_irq_init(void); #define __softirq_entry \ __attribute__((__section__(".softirqentry.text"))) -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; -/* Limits of softirq entrypoints */ -extern char __softirqentry_text_start[]; -extern char __softirqentry_text_end[]; - -#else -#define __irq_entry -#define __softirq_entry -#endif - #endif From d9f5f32a7d17f4906a21ad59589853639a1328a0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:39:26 +0900 Subject: [PATCH 167/253] kprobes/x86: Do not jump-optimize kprobes on irq entry code Since the kernel segment registers are not prepared at the entry of irq-entry code, if a kprobe on such code is jump-optimized, accessing per-CPU variables may cause a kernel panic. However, if the kprobe is not optimized, it triggers an int3 exception and sets segment registers correctly. With this patch we check the probe-address and if it is in the irq-entry code, it prohibits optimizing such kprobes. This means we can continue probing such interrupt handlers by kprobes but it is not optimized anymore. Reported-by: Francis Deslauriers Tested-by: Francis Deslauriers Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172795654.27216.9824039077047777477.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/opt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 69ea0bc1cfa3..4f98aad38237 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "common.h" @@ -251,10 +252,12 @@ static int can_optimize(unsigned long paddr) /* * Do not optimize in the entry code due to the unstable - * stack handling. + * stack handling and registers setup. */ - if ((paddr >= (unsigned long)__entry_text_start) && - (paddr < (unsigned long)__entry_text_end)) + if (((paddr >= (unsigned long)__entry_text_start) && + (paddr < (unsigned long)__entry_text_end)) || + ((paddr >= (unsigned long)__irqentry_text_start) && + (paddr < (unsigned long)__irqentry_text_end))) return 0; /* Check there is enough space for a relative jump. */ From 6fae8663c9940bcaa9edd8e21a9ae0f562789a3d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 2 Aug 2017 20:12:16 +0530 Subject: [PATCH 168/253] perf scripting python: Add ppc64le to audit uname list Before patch: $ uname -m ppc64le $ ./perf script -s ./scripts/python/syscall-counts.py Install the audit-libs-python package to get syscall names. For example: # apt-get install python-audit (Ubuntu) # yum install audit-libs-python (Fedora) etc. Press control+C to stop and show the summary ^CWarning: 4 out of order events recorded. syscall events: event count ---------------------------------------- ----------- 4 504638 54 1206 221 42 55 21 3 12 167 10 11 8 6 7 125 6 5 6 108 5 162 4 90 4 45 3 33 3 311 1 246 1 238 1 93 1 91 1 After patch: ./perf script -s ./scripts/python/syscall-counts.py Press control+C to stop and show the summary ^CWarning: 5 out of order events recorded. syscall events: event count ---------------------------------------- ----------- write 643411 ioctl 1206 futex 54 fcntl 27 poll 14 read 12 execve 8 close 7 mprotect 6 open 6 nanosleep 5 fstat 5 mmap 4 inotify_add_watch 3 brk 3 access 3 timerfd_settime 1 clock_gettime 1 epoll_wait 1 ftruncate 1 munmap 1 Signed-off-by: Naveen N. Rao Acked-by: Paul Clarke Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/n/tip-bnl67p1alkvx97pn9moxz3qp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 1d95009592eb..f6c84966e4f8 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -57,6 +57,7 @@ try: 'ia64' : audit.MACH_IA64, 'ppc' : audit.MACH_PPC, 'ppc64' : audit.MACH_PPC64, + 'ppc64le' : audit.MACH_PPC64LE, 's390' : audit.MACH_S390, 's390x' : audit.MACH_S390X, 'i386' : audit.MACH_X86, From 2862a16875452b697c65d8e06cc010c922d19171 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 27 Jul 2017 01:42:29 -0400 Subject: [PATCH 169/253] perf vendor events powerpc: remove suffix in mapfile Drop the .json suffix for events directory in the mapfile.csv. Now that we have separate JSON files for each topic in a CPU (eg: see tools/perf/pmu-events/arch/powerpc/power8/*.json) the .json suffix in the mapfile is misleading and redundant. Reported-by: Michael Ellerman Signed-off-by: Sukadev Bhattiprolu Cc: Anton Blanchard Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170802174617.GA32545@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/powerpc/mapfile.csv | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index 14318ef215f8..a0f3a11ca19f 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -13,13 +13,13 @@ # # Power8 entries -004b0000,1,power8.json,core -004b0201,1,power8.json,core -004c0000,1,power8.json,core -004d0000,1,power8.json,core -004d0100,1,power8.json,core -004d0200,1,power8.json,core -004c0100,1,power8.json,core -004e0100,1,power9.json,core -004e0200,1,power9.json,core -004e1200,1,power9.json,core +004b0000,1,power8,core +004b0201,1,power8,core +004c0000,1,power8,core +004d0000,1,power8,core +004d0100,1,power8,core +004d0200,1,power8,core +004c0100,1,power8,core +004e0100,1,power9,core +004e0200,1,power9,core +004e1200,1,power9,core From 3c22ba5243040c13f9a79e3ae70399c0ae0872a4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 27 Jul 2017 02:23:57 -0400 Subject: [PATCH 170/253] perf vendor events powerpc: Update POWER9 events Update and cleanup POWER9 PMU events. Signed-off-by: Sukadev Bhattiprolu Cc: Anton Blanchard Cc: Jiri Olsa Cc: Michael Ellerman Link: http://lkml.kernel.org/r/20170802174617.GA32545@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/cache.json | 259 +- .../arch/powerpc/power9/floating-point.json | 42 +- .../arch/powerpc/power9/frontend.json | 757 ++-- .../arch/powerpc/power9/marked.json | 1373 ++++--- .../arch/powerpc/power9/memory.json | 234 +- .../pmu-events/arch/powerpc/power9/other.json | 3246 +++++++++++++---- .../arch/powerpc/power9/pipeline.json | 1113 +++--- .../pmu-events/arch/powerpc/power9/pmc.json | 239 +- .../arch/powerpc/power9/translation.json | 468 ++- 9 files changed, 4472 insertions(+), 3259 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 437c83b7e6af..18f6645f2897 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -1,176 +1,137 @@ [ {, - "EventCode": "0x1002A", - "EventName": "PM_CMPLU_STALL_LARX", - "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied", - "PublicDescription": "" - }, - {, - "EventCode": "0x1003C", - "EventName": "PM_CMPLU_STALL_DMISS_L2L3", - "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3", - "PublicDescription": "" - }, - {, - "EventCode": "0x14048", - "EventName": "PM_INST_FROM_ON_CHIP_CACHE", - "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E054", - "EventName": "PM_LD_MISS_L1", - "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", - "PublicDescription": "" - }, - {, - "EventCode": "0x400F0", - "EventName": "PM_LD_MISS_L1", - "BriefDescription": "Load Missed L1, at execution time (not gated by finish, which means this counter can be greater than loads finished)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1404A", - "EventName": "PM_INST_FROM_RL2L3_SHR", - "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C058", - "EventName": "PM_DTLB_MISS_16G", - "BriefDescription": "Data TLB Miss page size 16G", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D15C", - "EventName": "PM_MRK_DTLB_MISS_1G", - "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used", - "PublicDescription": "" + "EventCode": "0x300F4", + "EventName": "PM_THRD_CONC_RUN_INST", + "BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set" }, {, "EventCode": "0x1E056", "EventName": "PM_CMPLU_STALL_FLUSH_ANY_THREAD", - "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", - "PublicDescription": "" - }, - {, - "EventCode": "0x101E6", - "EventName": "PM_THRESH_EXC_4096", - "BriefDescription": "Threshold counter exceed a count of 4096", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C01A", - "EventName": "PM_CMPLU_STALL_LHS", - "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D016", - "EventName": "PM_CMPLU_STALL_FXU", - "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", - "PublicDescription": "" - }, - {, - "EventCode": "0x24046", - "EventName": "PM_INST_FROM_RL2L3_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2404A", - "EventName": "PM_INST_FROM_RL4", - "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F140", - "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D15E", - "EventName": "PM_MRK_DTLB_MISS_16G", - "BriefDescription": "Marked Data TLB Miss page size 16G", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F14A", - "EventName": "PM_MRK_DPTEG_FROM_RMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D156", - "EventName": "PM_MRK_DTLB_MISS_64K", - "BriefDescription": "Marked Data TLB Miss page size 64K", - "PublicDescription": "" - }, - {, - "EventCode": "0x3006C", - "EventName": "PM_RUN_CYC_SMT2_MODE", - "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode", - "PublicDescription": "" - }, - {, - "EventCode": "0x300F4", - "EventName": "PM_THRD_CONC_RUN_INST", - "BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C014", - "EventName": "PM_CMPLU_STALL_LMQ_FULL", - "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C016", - "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT", - "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D014", - "EventName": "PM_CMPLU_STALL_LOAD_FINISH", - "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", - "PublicDescription": "" + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion" }, {, "EventCode": "0x4D016", "EventName": "PM_CMPLU_STALL_FXLONG", - "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)", - "PublicDescription": "" + "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)" + }, + {, + "EventCode": "0x2D016", + "EventName": "PM_CMPLU_STALL_FXU", + "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes" + }, + {, + "EventCode": "0x1D15C", + "EventName": "PM_MRK_DTLB_MISS_1G", + "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used" }, {, "EventCode": "0x4D12A", "EventName": "PM_MRK_DATA_FROM_RL4_CYC", - "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load", - "PublicDescription": "" + "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load" }, {, - "EventCode": "0x4C15E", - "EventName": "PM_MRK_DTLB_MISS_16M", - "BriefDescription": "Marked Data TLB Miss page size 16M", - "PublicDescription": "" + "EventCode": "0x1003C", + "EventName": "PM_CMPLU_STALL_DMISS_L2L3", + "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3" }, {, - "EventCode": "0x401E4", - "EventName": "PM_MRK_DTLB_MISS", - "BriefDescription": "Marked dtlb miss", - "PublicDescription": "" + "EventCode": "0x4C014", + "EventName": "PM_CMPLU_STALL_LMQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full" + }, + {, + "EventCode": "0x14048", + "EventName": "PM_INST_FROM_ON_CHIP_CACHE", + "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4D014", + "EventName": "PM_CMPLU_STALL_LOAD_FINISH", + "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish" + }, + {, + "EventCode": "0x2404A", + "EventName": "PM_INST_FROM_RL4", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x1404A", + "EventName": "PM_INST_FROM_RL2L3_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)" }, {, "EventCode": "0x401EA", "EventName": "PM_THRESH_EXC_128", - "BriefDescription": "Threshold counter exceeded a value of 128", - "PublicDescription": "" + "BriefDescription": "Threshold counter exceeded a value of 128" }, {, "EventCode": "0x400F6", "EventName": "PM_BR_MPRED_CMPL", - "BriefDescription": "Number of Branch Mispredicts", - "PublicDescription": "" + "BriefDescription": "Number of Branch Mispredicts" + }, + {, + "EventCode": "0x2F140", + "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x101E6", + "EventName": "PM_THRESH_EXC_4096", + "BriefDescription": "Threshold counter exceed a count of 4096" + }, + {, + "EventCode": "0x3D156", + "EventName": "PM_MRK_DTLB_MISS_64K", + "BriefDescription": "Marked Data TLB Miss page size 64K" + }, + {, + "EventCode": "0x4C15E", + "EventName": "PM_MRK_DTLB_MISS_16M", + "BriefDescription": "Marked Data TLB Miss page size 16M" + }, + {, + "EventCode": "0x2D15E", + "EventName": "PM_MRK_DTLB_MISS_16G", + "BriefDescription": "Marked Data TLB Miss page size 16G" + }, + {, + "EventCode": "0x3F14A", + "EventName": "PM_MRK_DPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4C016", + "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT", + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict" + }, + {, + "EventCode": "0x2C01A", + "EventName": "PM_CMPLU_STALL_LHS", + "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data" + }, + {, + "EventCode": "0x401E4", + "EventName": "PM_MRK_DTLB_MISS", + "BriefDescription": "Marked dtlb miss" + }, + {, + "EventCode": "0x24046", + "EventName": "PM_INST_FROM_RL2L3_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x1002A", + "EventName": "PM_CMPLU_STALL_LARX", + "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" + }, + {, + "EventCode": "0x3006C", + "EventName": "PM_RUN_CYC_SMT2_MODE", + "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode" + }, + {, + "EventCode": "0x1C058", + "EventName": "PM_DTLB_MISS_16G", + "BriefDescription": "Data TLB Miss page size 16G" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json index d4e4669c1cf3..8a83bca26552 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json @@ -1,44 +1,32 @@ [ - {, - "EventCode": "0x10058", - "EventName": "PM_MEM_LOC_THRESH_IFU", - "BriefDescription": "Local Memory above threshold for IFU speculation control", - "PublicDescription": "" - }, - {, - "EventCode": "0x4505E", - "EventName": "PM_FLOP_CMPL", - "BriefDescription": "Floating Point Operation Finished", - "PublicDescription": "" - }, {, "EventCode": "0x1415A", "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load", - "PublicDescription": "" + "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load" + }, + {, + "EventCode": "0x10058", + "EventName": "PM_MEM_LOC_THRESH_IFU", + "BriefDescription": "Local Memory above threshold for IFU speculation control" }, {, "EventCode": "0x2D028", "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L2", - "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D154", - "EventName": "PM_MRK_DERAT_MISS_64K", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K", - "PublicDescription": "" + "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache" }, {, "EventCode": "0x30012", "EventName": "PM_FLUSH_COMPLETION", - "BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush", - "PublicDescription": "" + "BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush" + }, + {, + "EventCode": "0x2D154", + "EventName": "PM_MRK_DERAT_MISS_64K", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K" }, {, "EventCode": "0x4016E", "EventName": "PM_THRESH_NOT_MET", - "BriefDescription": "Threshold counter did not meet threshold", - "PublicDescription": "" + "BriefDescription": "Threshold counter did not meet threshold" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index 5da59d15af94..7e62c46d7a20 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -1,446 +1,377 @@ [ {, - "EventCode": "0x20036", - "EventName": "PM_BR_2PATH", - "BriefDescription": "Branches that are not strongly biased", - "PublicDescription": "" + "EventCode": "0x3E15C", + "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", + "BriefDescription": "TM marked store abort for this thread" }, {, - "EventCode": "0x40036", - "EventName": "PM_BR_2PATH", - "BriefDescription": "Branches that are not strongly biased", - "PublicDescription": "" - }, - {, - "EventCode": "0x10004", - "EventName": "PM_CMPLU_STALL_LRQ_OTHER", - "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", - "PublicDescription": "" - }, - {, - "EventCode": "0x10010", - "EventName": "PM_PMC4_OVERFLOW", - "BriefDescription": "Overflow from counter 4", - "PublicDescription": "" - }, - {, - "EventCode": "0x1001A", - "EventName": "PM_LSU_SRQ_FULL_CYC", - "BriefDescription": "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource", - "PublicDescription": "" - }, - {, - "EventCode": "0x10020", - "EventName": "PM_PMC4_REWIND", - "BriefDescription": "PMC4 Rewind Event", - "PublicDescription": "" - }, - {, - "EventCode": "0x1003A", - "EventName": "PM_CMPLU_STALL_LSU_FIN", - "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x1013E", - "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC", - "BriefDescription": "Marked Load exposed Miss (use edge detect to count #)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C044", - "EventName": "PM_DATA_FROM_L3_NO_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x15044", - "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x15046", - "EventName": "PM_IPTEG_FROM_L3.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1015E", - "EventName": "PM_MRK_FAB_RSP_RD_T_INTV", - "BriefDescription": "Sampled Read got a T intervention", - "PublicDescription": "" - }, - {, - "EventCode": "0x14054", - "EventName": "PM_INST_PUMP_CPRED", - "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x15152", - "EventName": "PM_SYNC_MRK_BR_LINK", - "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x1515C", - "EventName": "PM_SYNC_MRK_BR_MPRED", - "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E050", - "EventName": "PM_CMPLU_STALL_TEND", - "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E15E", - "EventName": "PM_MRK_L2_TM_REQ_ABORT", - "BriefDescription": "TM abort", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F054", - "EventName": "PM_TLB_HIT", - "BriefDescription": "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT", - "PublicDescription": "" - }, - {, - "EventCode": "0x1006A", - "EventName": "PM_NTC_ISSUE_HELD_DARQ_FULL", - "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", - "PublicDescription": "" + "EventCode": "0x25044", + "EventName": "PM_IPTEG_FROM_L31_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request" }, {, "EventCode": "0x101E8", "EventName": "PM_THRESH_EXC_256", - "BriefDescription": "Threshold counter exceed a count of 256", - "PublicDescription": "" - }, - {, - "EventCode": "0x101EC", - "EventName": "PM_THRESH_MET", - "BriefDescription": "threshold exceeded", - "PublicDescription": "" - }, - {, - "EventCode": "0x100F2", - "EventName": "PM_1PLUS_PPC_CMPL", - "BriefDescription": "1 or more ppc insts finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x20114", - "EventName": "PM_MRK_L2_RC_DISP", - "BriefDescription": "Marked Instruction RC dispatched in L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C010", - "EventName": "PM_CMPLU_STALL_LSU", - "BriefDescription": "Completion stall by LSU instruction", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C014", - "EventName": "PM_CMPLU_STALL_STORE_FINISH", - "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C01E", - "EventName": "PM_CMPLU_STALL_SYNC_PMU_INT", - "BriefDescription": "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D01C", - "EventName": "PM_CMPLU_STALL_STCX", - "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E01A", - "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", - "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C124", - "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C042", - "EventName": "PM_DATA_FROM_L3_MEPF", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D14C", - "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x25042", - "EventName": "PM_IPTEG_FROM_L3_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x25044", - "EventName": "PM_IPTEG_FROM_L3.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x2015E", - "EventName": "PM_MRK_FAB_RSP_RWITM_RTY", - "BriefDescription": "Sampled store did a rwitm and got a rty", - "PublicDescription": "" - }, - {, - "EventCode": "0x24050", - "EventName": "PM_IOPS_CMPL", - "BriefDescription": "Internal Operations completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x24154", - "EventName": "PM_THRESH_ACC", - "BriefDescription": "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs.", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F152", - "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC", - "BriefDescription": "cycles L2 RC took for a dclaim", - "PublicDescription": "" - }, - {, - "EventCode": "0x200FA", - "EventName": "PM_BR_TAKEN_CMPL", - "BriefDescription": "New event for Branch Taken", - "PublicDescription": "" - }, - {, - "EventCode": "0x30014", - "EventName": "PM_CMPLU_STALL_STORE_FIN_ARB", - "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", - "PublicDescription": "" - }, - {, - "EventCode": "0x3001C", - "EventName": "PM_LSU_REJECT_LMQ_FULL", - "BriefDescription": "LSU Reject due to LMQ full (up to 4 per cycles)", - "PublicDescription": "" - }, - {, - "EventCode": "0x30026", - "EventName": "PM_CMPLU_STALL_STORE_DATA", - "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data", - "PublicDescription": "" - }, - {, - "EventCode": "0x3012A", - "EventName": "PM_MRK_L2_RC_DONE", - "BriefDescription": "Marked RC done", - "PublicDescription": "" - }, - {, - "EventCode": "0x35044", - "EventName": "PM_IPTEG_FROM_L3.1_ECO_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E04A", - "EventName": "PM_DPTEG_FROM_RMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x30154", - "EventName": "PM_MRK_FAB_RSP_DCLAIM", - "BriefDescription": "Marked store had to do a dclaim", - "PublicDescription": "" - }, - {, - "EventCode": "0x3015E", - "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY", - "BriefDescription": "Sampled store did a rwitm and got a rty", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C056", - "EventName": "PM_DTLB_MISS_64K", - "BriefDescription": "Data TLB Miss page size 64K", - "PublicDescription": "" - }, - {, - "EventCode": "0x34050", - "EventName": "PM_INST_SYS_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x34052", - "EventName": "PM_INST_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x34056", - "EventName": "PM_CMPLU_STALL_LSU_MFSPR", - "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", - "PublicDescription": "" - }, - {, - "EventCode": "0x3515A", - "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC", - "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3515C", - "EventName": "PM_MRK_DATA_FROM_RL4", - "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E15C", - "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", - "BriefDescription": "TM marked store abort for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x30060", - "EventName": "PM_TM_TRANS_RUN_INST", - "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x301E6", - "EventName": "PM_MRK_DERAT_MISS", - "BriefDescription": "Erat Miss (TLB Access) All page sizes", - "PublicDescription": "" - }, - {, - "EventCode": "0x301EA", - "EventName": "PM_THRESH_EXC_1024", - "BriefDescription": "Threshold counter exceeded a value of 1024", - "PublicDescription": "" - }, - {, - "EventCode": "0x300FA", - "EventName": "PM_INST_FROM_L3MISS", - "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet", - "PublicDescription": "" - }, - {, - "EventCode": "0x40116", - "EventName": "PM_MRK_LARX_FIN", - "BriefDescription": "Larx finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C010", - "EventName": "PM_CMPLU_STALL_STORE_PIPE_ARB", - "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C01C", - "EventName": "PM_CMPLU_STALL_ST_FWD", - "BriefDescription": "Completion stall due to store forward", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E012", - "EventName": "PM_CMPLU_STALL_MTFPSCR", - "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E016", - "EventName": "PM_CMPLU_STALL_LSAQ_ARB", - "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C12A", - "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC", - "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C044", - "EventName": "PM_DATA_FROM_L3.1_ECO_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x45044", - "EventName": "PM_IPTEG_FROM_L3.1_ECO_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x45048", - "EventName": "PM_IPTEG_FROM_DL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", - "PublicDescription": "" + "BriefDescription": "Threshold counter exceed a count of 256" }, {, "EventCode": "0x4504E", "EventName": "PM_IPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", - "PublicDescription": "" + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request" }, {, - "EventCode": "0x4E042", - "EventName": "PM_DPTEG_FROM_L3", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventCode": "0x1006A", + "EventName": "PM_NTC_ISSUE_HELD_DARQ_FULL", + "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it" }, {, - "EventCode": "0x4015E", - "EventName": "PM_MRK_FAB_RSP_RD_RTY", - "BriefDescription": "Sampled L2 reads retry count", - "PublicDescription": "" + "EventCode": "0x4E016", + "EventName": "PM_CMPLU_STALL_LSAQ_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch" }, {, - "EventCode": "0x4C056", - "EventName": "PM_DTLB_MISS_16M", - "BriefDescription": "Data TLB Miss page size 16M", - "PublicDescription": "" + "EventCode": "0x1001A", + "EventName": "PM_LSU_SRQ_FULL_CYC", + "BriefDescription": "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource" + }, + {, + "EventCode": "0x1E15E", + "EventName": "PM_MRK_L2_TM_REQ_ABORT", + "BriefDescription": "TM abort" + }, + {, + "EventCode": "0x34052", + "EventName": "PM_INST_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch" + }, + {, + "EventCode": "0x20114", + "EventName": "PM_MRK_L2_RC_DISP", + "BriefDescription": "Marked Instruction RC dispatched in L2" + }, + {, + "EventCode": "0x4C044", + "EventName": "PM_DATA_FROM_L31_ECO_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load" + }, + {, + "EventCode": "0x1C044", + "EventName": "PM_DATA_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load" }, {, "EventCode": "0x44050", "EventName": "PM_INST_SYS_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch", - "PublicDescription": "" + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch" }, {, - "EventCode": "0x44052", - "EventName": "PM_INST_PUMP_MPRED", - "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch", - "PublicDescription": "" + "EventCode": "0x30154", + "EventName": "PM_MRK_FAB_RSP_DCLAIM", + "BriefDescription": "Marked store had to do a dclaim" + }, + {, + "EventCode": "0x30014", + "EventName": "PM_CMPLU_STALL_STORE_FIN_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe" + }, + {, + "EventCode": "0x3E054", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." + }, + {, + "EventCode": "0x400F0", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." + }, + {, + "EventCode": "0x2E01A", + "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", + "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete" + }, + {, + "EventCode": "0x2D01C", + "EventName": "PM_CMPLU_STALL_STCX", + "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2" + }, + {, + "EventCode": "0x2C010", + "EventName": "PM_CMPLU_STALL_LSU", + "BriefDescription": "Completion stall by LSU instruction" + }, + {, + "EventCode": "0x2C042", + "EventName": "PM_DATA_FROM_L3_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load" + }, + {, + "EventCode": "0x4E012", + "EventName": "PM_CMPLU_STALL_MTFPSCR", + "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)" + }, + {, + "EventCode": "0x100F2", + "EventName": "PM_1PLUS_PPC_CMPL", + "BriefDescription": "1 or more ppc insts finished" + }, + {, + "EventCode": "0x3001C", + "EventName": "PM_LSU_REJECT_LMQ_FULL", + "BriefDescription": "LSU Reject due to LMQ full (up to 4 per cycles)" + }, + {, + "EventCode": "0x15046", + "EventName": "PM_IPTEG_FROM_L31_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request" + }, + {, + "EventCode": "0x1015E", + "EventName": "PM_MRK_FAB_RSP_RD_T_INTV", + "BriefDescription": "Sampled Read got a T intervention" + }, + {, + "EventCode": "0x101EC", + "EventName": "PM_THRESH_MET", + "BriefDescription": "threshold exceeded" + }, + {, + "EventCode": "0x10020", + "EventName": "PM_PMC4_REWIND", + "BriefDescription": "PMC4 Rewind Event" + }, + {, + "EventCode": "0x301EA", + "EventName": "PM_THRESH_EXC_1024", + "BriefDescription": "Threshold counter exceeded a value of 1024" + }, + {, + "EventCode": "0x34056", + "EventName": "PM_CMPLU_STALL_LSU_MFSPR", + "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned" }, {, "EventCode": "0x44056", "EventName": "PM_VECTOR_ST_CMPL", - "BriefDescription": "Number of vector store instructions completed", - "PublicDescription": "" + "BriefDescription": "Number of vector store instructions completed" + }, + {, + "EventCode": "0x2C124", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load" + }, + {, + "EventCode": "0x4C12A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" + }, + {, + "EventCode": "0x3C056", + "EventName": "PM_DTLB_MISS_64K", + "BriefDescription": "Data TLB Miss page size 64K" + }, + {, + "EventCode": "0x30060", + "EventName": "PM_TM_TRANS_RUN_INST", + "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)" + }, + {, + "EventCode": "0x2C014", + "EventName": "PM_CMPLU_STALL_STORE_FINISH", + "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish" + }, + {, + "EventCode": "0x3515A", + "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC", + "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x34050", + "EventName": "PM_INST_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch" + }, + {, + "EventCode": "0x3015E", + "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY", + "BriefDescription": "Sampled store did a rwitm and got a rty" + }, + {, + "EventCode": "0x0", + "EventName": "PM_SUSPENDED", + "BriefDescription": "Counter OFF" + }, + {, + "EventCode": "0x10010", + "EventName": "PM_PMC4_OVERFLOW", + "BriefDescription": "Overflow from counter 4" + }, + {, + "EventCode": "0x3E04A", + "EventName": "PM_DPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2F152", + "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC", + "BriefDescription": "cycles L2 RC took for a dclaim" + }, + {, + "EventCode": "0x10004", + "EventName": "PM_CMPLU_STALL_LRQ_OTHER", + "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others" }, {, "EventCode": "0x4F150", "EventName": "PM_MRK_FAB_RSP_RWITM_CYC", - "BriefDescription": "cycles L2 RC took for a rwitm", - "PublicDescription": "" + "BriefDescription": "cycles L2 RC took for a rwitm" + }, + {, + "EventCode": "0x4E042", + "EventName": "PM_DPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x1F054", + "EventName": "PM_TLB_HIT", + "BriefDescription": "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT" + }, + {, + "EventCode": "0x2C01E", + "EventName": "PM_CMPLU_STALL_SYNC_PMU_INT", + "BriefDescription": "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt" + }, + {, + "EventCode": "0x24050", + "EventName": "PM_IOPS_CMPL", + "BriefDescription": "Internal Operations completed" + }, + {, + "EventCode": "0x1515C", + "EventName": "PM_SYNC_MRK_BR_MPRED", + "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt" + }, + {, + "EventCode": "0x300FA", + "EventName": "PM_INST_FROM_L3MISS", + "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet" + }, + {, + "EventCode": "0x15044", + "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request" + }, + {, + "EventCode": "0x15152", + "EventName": "PM_SYNC_MRK_BR_LINK", + "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt" + }, + {, + "EventCode": "0x1E050", + "EventName": "PM_CMPLU_STALL_TEND", + "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2" + }, + {, + "EventCode": "0x1013E", + "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC", + "BriefDescription": "Marked Load exposed Miss (use edge detect to count #)" + }, + {, + "EventCode": "0x25042", + "EventName": "PM_IPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request" + }, + {, + "EventCode": "0x14054", + "EventName": "PM_INST_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch" + }, + {, + "EventCode": "0x4015E", + "EventName": "PM_MRK_FAB_RSP_RD_RTY", + "BriefDescription": "Sampled L2 reads retry count" + }, + {, + "EventCode": "0x45048", + "EventName": "PM_IPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request" + }, + {, + "EventCode": "0x44052", + "EventName": "PM_INST_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch" + }, + {, + "EventCode": "0x30026", + "EventName": "PM_CMPLU_STALL_STORE_DATA", + "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data" + }, + {, + "EventCode": "0x301E6", + "EventName": "PM_MRK_DERAT_MISS", + "BriefDescription": "Erat Miss (TLB Access) All page sizes" + }, + {, + "EventCode": "0x24154", + "EventName": "PM_THRESH_ACC", + "BriefDescription": "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs." + }, + {, + "EventCode": "0x2015E", + "EventName": "PM_MRK_FAB_RSP_RWITM_RTY", + "BriefDescription": "Sampled store did a rwitm and got a rty" + }, + {, + "EventCode": "0x200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "New event for Branch Taken" + }, + {, + "EventCode": "0x35044", + "EventName": "PM_IPTEG_FROM_L31_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request" + }, + {, + "EventCode": "0x4C010", + "EventName": "PM_CMPLU_STALL_STORE_PIPE_ARB", + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration" + }, + {, + "EventCode": "0x4C01C", + "EventName": "PM_CMPLU_STALL_ST_FWD", + "BriefDescription": "Completion stall due to store forward" + }, + {, + "EventCode": "0x3515C", + "EventName": "PM_MRK_DATA_FROM_RL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load" + }, + {, + "EventCode": "0x2D14C", + "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x40116", + "EventName": "PM_MRK_LARX_FIN", + "BriefDescription": "Larx finished" + }, + {, + "EventCode": "0x4C056", + "EventName": "PM_DTLB_MISS_16M", + "BriefDescription": "Data TLB Miss page size 16M" + }, + {, + "EventCode": "0x1003A", + "EventName": "PM_CMPLU_STALL_LSU_FIN", + "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish" + }, + {, + "EventCode": "0x3012A", + "EventName": "PM_MRK_L2_RC_DONE", + "BriefDescription": "Marked RC done" + }, + {, + "EventCode": "0x45044", + "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index e4d673235830..b9df54fb37e3 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -1,782 +1,647 @@ [ {, - "EventCode": "0x1002C", - "EventName": "PM_L1_DCACHE_RELOADED_ALL", - "BriefDescription": "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well", - "PublicDescription": "" - }, - {, - "EventCode": "0x10132", - "EventName": "PM_MRK_INST_ISSUED", - "BriefDescription": "Marked instruction issued", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C042", - "EventName": "PM_DATA_FROM_L2", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C046", - "EventName": "PM_DATA_FROM_L3.1_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C048", - "EventName": "PM_DATA_FROM_ON_CHIP_CACHE", - "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x14040", - "EventName": "PM_INST_FROM_L2_NO_CONFLICT", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x14042", - "EventName": "PM_INST_FROM_L2", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x14046", - "EventName": "PM_INST_FROM_L3.1_SHR", - "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1404C", - "EventName": "PM_INST_FROM_LL4", - "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D14C", - "EventName": "PM_MRK_DATA_FROM_LL4", - "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x15042", - "EventName": "PM_IPTEG_FROM_L2", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1504E", - "EventName": "PM_IPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E042", - "EventName": "PM_DPTEG_FROM_L2", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E044", - "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E046", - "EventName": "PM_DPTEG_FROM_L3.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F14A", - "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F14C", - "EventName": "PM_MRK_DPTEG_FROM_LL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1005C", - "EventName": "PM_CMPLU_STALL_DP", - "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C052", - "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C054", - "EventName": "PM_DATA_PUMP_CPRED", - "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C05E", - "EventName": "PM_MEM_LOC_THRESH_LSU_MED", - "BriefDescription": "Local memory above threshold for data prefetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x1415E", - "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D058", - "EventName": "PM_DARQ0_10_12_ENTRIES", - "BriefDescription": "Cycles in which 10 or more DARQ entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x15150", - "EventName": "PM_SYNC_MRK_PROBE_NOP", - "BriefDescription": "Marked probeNops which can cause synchronous interrupts", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E052", - "EventName": "PM_CMPLU_STALL_SLB", - "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F150", - "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC", - "BriefDescription": "cycles from L2 rc disp to l2 rc completion", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F05A", - "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L2", - "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F05C", - "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L3", - "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x1006C", - "EventName": "PM_RUN_CYC_ST_MODE", - "BriefDescription": "Cycles run latch is set and core is in ST mode", - "PublicDescription": "" - }, - {, - "EventCode": "0x1016E", - "EventName": "PM_MRK_BR_CMPL", - "BriefDescription": "Branch Instruction completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x101E0", - "EventName": "PM_MRK_INST_DISP", - "BriefDescription": "The thread has dispatched a randomly sampled marked instruction", - "PublicDescription": "" - }, - {, - "EventCode": "0x101E2", - "EventName": "PM_MRK_BR_TAKEN_CMPL", - "BriefDescription": "Marked Branch Taken completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C016", - "EventName": "PM_CMPLU_STALL_PASTE", - "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C01C", - "EventName": "PM_CMPLU_STALL_DMISS_REMOTE", - "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E01E", - "EventName": "PM_CMPLU_STALL_NTC_FLUSH", - "BriefDescription": "Completion stall due to ntc flush", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C128", - "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC", - "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C12E", - "EventName": "PM_MRK_DATA_FROM_LL4_CYC", - "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D024", - "EventName": "PM_RADIX_PWC_L2_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache.", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D02A", - "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L2", - "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D02E", - "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L2", - "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x20130", - "EventName": "PM_MRK_INST_DECODED", - "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only", - "PublicDescription": "" - }, - {, - "EventCode": "0x20138", - "EventName": "PM_MRK_ST_NEST", - "BriefDescription": "Marked store sent to nest", - "PublicDescription": "" - }, - {, - "EventCode": "0x2013A", - "EventName": "PM_MRK_BRU_FIN", - "BriefDescription": "bru marked instr finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C044", - "EventName": "PM_DATA_FROM_L3.1_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C048", - "EventName": "PM_DATA_FROM_LMEM", - "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C04A", - "EventName": "PM_DATA_FROM_RL4", - "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x24044", - "EventName": "PM_INST_FROM_L3.1_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x25040", - "EventName": "PM_IPTEG_FROM_L2_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E044", - "EventName": "PM_DPTEG_FROM_L3.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E048", - "EventName": "PM_DPTEG_FROM_LMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F148", - "EventName": "PM_MRK_DPTEG_FROM_LMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x20050", - "EventName": "PM_GRP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C052", - "EventName": "PM_DATA_GRP_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C058", - "EventName": "PM_MEM_PREF", - "BriefDescription": "Memory prefetch for this thread. Includes L4", - "PublicDescription": "" - }, - {, - "EventCode": "0x24156", - "EventName": "PM_MRK_STCX_FIN", - "BriefDescription": "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", - "PublicDescription": "" - }, - {, - "EventCode": "0x24158", - "EventName": "PM_MRK_INST", - "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E050", - "EventName": "PM_DARQ0_7_9_ENTRIES", - "BriefDescription": "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E05E", - "EventName": "PM_LMQ_EMPTY_CYC", - "BriefDescription": "Cycles in which the LMQ has no pending load misses for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x200FD", - "EventName": "PM_L1_ICACHE_MISS", - "BriefDescription": "Demand iCache Miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x30006", - "EventName": "PM_CMPLU_STALL_OTHER_CMPL", - "BriefDescription": "Instructions the core completed while this tread was stalled", - "PublicDescription": "" - }, - {, - "EventCode": "0x30008", - "EventName": "PM_DISP_STARVED", - "BriefDescription": "Dispatched Starved", - "PublicDescription": "" - }, - {, - "EventCode": "0x3000A", - "EventName": "PM_CMPLU_STALL_PM", - "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x3000E", - "EventName": "PM_FXU_1PLUS_BUSY", - "BriefDescription": "At least one of the 4 FXU units is busy", - "PublicDescription": "" - }, - {, - "EventCode": "0x30028", - "EventName": "PM_CMPLU_STALL_SPEC_FINISH", - "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", - "PublicDescription": "" - }, - {, - "EventCode": "0x3012C", - "EventName": "PM_MRK_ST_FWD", - "BriefDescription": "Marked st forwards", - "PublicDescription": "" - }, - {, - "EventCode": "0x30130", - "EventName": "PM_MRK_INST_FIN", - "BriefDescription": "marked instruction finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x3003A", - "EventName": "PM_CMPLU_STALL_EXCEPTION", - "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", - "PublicDescription": "" - }, - {, - "EventCode": "0x3003C", - "EventName": "PM_CMPLU_STALL_NESTED_TEND", - "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", - "PublicDescription": "" + "EventCode": "0x3C052", + "EventName": "PM_DATA_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" }, {, "EventCode": "0x3013E", "EventName": "PM_MRK_STALL_CMPLU_CYC", - "BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C044", - "EventName": "PM_DATA_FROM_L3.1_ECO_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C04A", - "EventName": "PM_DATA_FROM_RMEM", - "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x34040", - "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x34044", - "EventName": "PM_INST_FROM_L3.1_ECO_SHR", - "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x34048", - "EventName": "PM_INST_FROM_DL2L3_SHR", - "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3404C", - "EventName": "PM_INST_FROM_DL4", - "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x35046", - "EventName": "PM_IPTEG_FROM_L2.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x3504E", - "EventName": "PM_DARQ0_4_6_ENTRIES", - "BriefDescription": "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E044", - "EventName": "PM_DPTEG_FROM_L3.1_ECO_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F144", - "EventName": "PM_MRK_DPTEG_FROM_L3.1_ECO_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x30050", - "EventName": "PM_SYS_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x30052", - "EventName": "PM_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C050", - "EventName": "PM_DATA_SYS_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C052", - "EventName": "PM_DATA_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D05A", - "EventName": "PM_NTC_ISSUE_HELD_OTHER", - "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D05C", - "EventName": "PM_DISP_HELD_HB_FULL", - "BriefDescription": "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E158", - "EventName": "PM_MRK_STCX_FAIL", - "BriefDescription": "marked stcx failed", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F056", - "EventName": "PM_RADIX_PWC_L3_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache.", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F058", - "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", - "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F05E", - "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3", - "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x30064", - "EventName": "PM_DARQ_STORE_XMIT", - "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core", - "PublicDescription": "" - }, - {, - "EventCode": "0x30068", - "EventName": "PM_L1_ICACHE_RELOADED_PREF", - "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x301E4", - "EventName": "PM_MRK_BR_MPRED_CMPL", - "BriefDescription": "Marked Branch Mispredicted", - "PublicDescription": "" - }, - {, - "EventCode": "0x300F2", - "EventName": "PM_INST_DISP", - "BriefDescription": "# PPC Dispatched", - "PublicDescription": "" - }, - {, - "EventCode": "0x300F6", - "EventName": "PM_L1_DCACHE_RELOAD_VALID", - "BriefDescription": "DL1 reloaded due to Demand Load", - "PublicDescription": "" - }, - {, - "EventCode": "0x300FE", - "EventName": "PM_DATA_FROM_L3MISS", - "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4000A", - "EventName": "PM_ISQ_36_44_ENTRIES", - "BriefDescription": "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core", - "PublicDescription": "" - }, - {, - "EventCode": "0x4000C", - "EventName": "PM_FREQ_UP", - "BriefDescription": "Power Management: Above Threshold A", - "PublicDescription": "" - }, - {, - "EventCode": "0x40012", - "EventName": "PM_L1_ICACHE_RELOADED_ALL", - "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D01A", - "EventName": "PM_CMPLU_STALL_EIEIO", - "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E014", - "EventName": "PM_TM_TX_PASS_RUN_INST", - "BriefDescription": "Run instructions spent in successful transactions", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E018", - "EventName": "PM_CMPLU_STALL_NTC_DISP_FIN", - "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C122", - "EventName": "PM_DARQ1_0_3_ENTRIES", - "BriefDescription": "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x40134", - "EventName": "PM_MRK_INST_TIMEO", - "BriefDescription": "marked Instruction finish timeout (instruction lost)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4003C", - "EventName": "PM_DISP_HELD_SYNC_HOLD", - "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C04A", - "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE", - "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C04E", - "EventName": "PM_DATA_FROM_L3MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x44040", - "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x44048", - "EventName": "PM_INST_FROM_DL2L3_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4404C", - "EventName": "PM_INST_FROM_DMEM", - "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4404E", - "EventName": "PM_INST_FROM_L3MISS_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D142", - "EventName": "PM_MRK_DATA_FROM_L3", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D04A", - "EventName": "PM_DARQ0_0_3_ENTRIES", - "BriefDescription": "Cycles in which 3 or less DARQ entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x45042", - "EventName": "PM_IPTEG_FROM_L3", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x45046", - "EventName": "PM_IPTEG_FROM_L2.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x4F144", - "EventName": "PM_MRK_DPTEG_FROM_L3.1_ECO_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4F14E", - "EventName": "PM_MRK_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C050", - "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C052", - "EventName": "PM_DATA_PUMP_MPRED", - "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4405E", - "EventName": "PM_DARQ_STORE_REJECT", - "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D058", - "EventName": "PM_VECTOR_FLOP_CMPL", - "BriefDescription": "Vector FP instruction completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D05A", - "EventName": "PM_NON_MATH_FLOP_CMPL", - "BriefDescription": "Non FLOP operation completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4505A", - "EventName": "PM_SP_FLOP_CMPL", - "BriefDescription": "SP instruction completed", - "PublicDescription": "" + "BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)" }, {, "EventCode": "0x4F056", "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3MISS", - "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache", - "PublicDescription": "" + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache" }, {, - "EventCode": "0x4F058", - "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3", - "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", - "PublicDescription": "" + "EventCode": "0x24158", + "EventName": "PM_MRK_INST", + "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens" + }, + {, + "EventCode": "0x1E046", + "EventName": "PM_DPTEG_FROM_L31_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x3C04A", + "EventName": "PM_DATA_FROM_RMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load" + }, + {, + "EventCode": "0x2C01C", + "EventName": "PM_CMPLU_STALL_DMISS_REMOTE", + "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)" + }, + {, + "EventCode": "0x44040", + "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x2E050", + "EventName": "PM_DARQ0_7_9_ENTRIES", + "BriefDescription": "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use" + }, + {, + "EventCode": "0x2D02E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation" + }, + {, + "EventCode": "0x3F05E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation" + }, + {, + "EventCode": "0x2E01E", + "EventName": "PM_CMPLU_STALL_NTC_FLUSH", + "BriefDescription": "Completion stall due to ntc flush" + }, + {, + "EventCode": "0x1F14C", + "EventName": "PM_MRK_DPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x20130", + "EventName": "PM_MRK_INST_DECODED", + "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only" + }, + {, + "EventCode": "0x3F144", + "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D058", + "EventName": "PM_VECTOR_FLOP_CMPL", + "BriefDescription": "Vector FP instruction completed" + }, + {, + "EventCode": "0x14040", + "EventName": "PM_INST_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4404E", + "EventName": "PM_INST_FROM_L3MISS_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch" + }, + {, + "EventCode": "0x3003A", + "EventName": "PM_CMPLU_STALL_EXCEPTION", + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete" + }, + {, + "EventCode": "0x4F144", + "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x3E044", + "EventName": "PM_DPTEG_FROM_L31_ECO_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x300F6", + "EventName": "PM_L1_DCACHE_RELOAD_VALID", + "BriefDescription": "DL1 reloaded due to Demand Load" + }, + {, + "EventCode": "0x1415E", + "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load" + }, + {, + "EventCode": "0x1E052", + "EventName": "PM_CMPLU_STALL_SLB", + "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB" + }, + {, + "EventCode": "0x4404C", + "EventName": "PM_INST_FROM_DMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x3000E", + "EventName": "PM_FXU_1PLUS_BUSY", + "BriefDescription": "At least one of the 4 FXU units is busy" + }, + {, + "EventCode": "0x2C048", + "EventName": "PM_DATA_FROM_LMEM", + "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load" + }, + {, + "EventCode": "0x3000A", + "EventName": "PM_CMPLU_STALL_PM", + "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle" + }, + {, + "EventCode": "0x1504E", + "EventName": "PM_IPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request" + }, + {, + "EventCode": "0x1C052", + "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load" + }, + {, + "EventCode": "0x30008", + "EventName": "PM_DISP_STARVED", + "BriefDescription": "Dispatched Starved" + }, + {, + "EventCode": "0x14042", + "EventName": "PM_INST_FROM_L2", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4000C", + "EventName": "PM_FREQ_UP", + "BriefDescription": "Power Management: Above Threshold A" + }, + {, + "EventCode": "0x3C050", + "EventName": "PM_DATA_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load" + }, + {, + "EventCode": "0x25040", + "EventName": "PM_IPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request" + }, + {, + "EventCode": "0x10132", + "EventName": "PM_MRK_INST_ISSUED", + "BriefDescription": "Marked instruction issued" + }, + {, + "EventCode": "0x1C046", + "EventName": "PM_DATA_FROM_L31_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load" + }, + {, + "EventCode": "0x2C044", + "EventName": "PM_DATA_FROM_L31_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load" + }, + {, + "EventCode": "0x2C04A", + "EventName": "PM_DATA_FROM_RL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load" + }, + {, + "EventCode": "0x24044", + "EventName": "PM_INST_FROM_L31_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4C050", + "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load" + }, + {, + "EventCode": "0x2C052", + "EventName": "PM_DATA_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load" + }, + {, + "EventCode": "0x2F148", + "EventName": "PM_MRK_DPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D01A", + "EventName": "PM_CMPLU_STALL_EIEIO", + "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2" + }, + {, + "EventCode": "0x4F14E", + "EventName": "PM_MRK_DPTEG_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, "EventCode": "0x4F05A", "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3", - "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation", - "PublicDescription": "" + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation" }, {, - "EventCode": "0x4F05E", - "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache", - "PublicDescription": "" + "EventCode": "0x1F05A", + "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation" }, {, - "EventCode": "0x401E0", - "EventName": "PM_MRK_INST_CMPL", - "BriefDescription": "marked instruction completed", - "PublicDescription": "" + "EventCode": "0x30068", + "EventName": "PM_L1_ICACHE_RELOADED_PREF", + "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)" }, {, - "EventCode": "0x400F4", - "EventName": "PM_RUN_PURR", - "BriefDescription": "Run_PURR", - "PublicDescription": "" - }, - {, - "EventCode": "0x400FC", - "EventName": "PM_ITLB_MISS", - "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed", - "PublicDescription": "" + "EventCode": "0x4C04A", + "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load" }, {, "EventCode": "0x400FE", "EventName": "PM_DATA_FROM_MEMORY", - "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load", - "PublicDescription": "" + "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load" + }, + {, + "EventCode": "0x3F058", + "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache" + }, + {, + "EventCode": "0x4D142", + "EventName": "PM_MRK_DATA_FROM_L3", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load" + }, + {, + "EventCode": "0x30050", + "EventName": "PM_SYS_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x30028", + "EventName": "PM_CMPLU_STALL_SPEC_FINISH", + "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC" + }, + {, + "EventCode": "0x400F4", + "EventName": "PM_RUN_PURR", + "BriefDescription": "Run_PURR" + }, + {, + "EventCode": "0x3404C", + "EventName": "PM_INST_FROM_DL4", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x3D05A", + "EventName": "PM_NTC_ISSUE_HELD_OTHER", + "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU" + }, + {, + "EventCode": "0x2E048", + "EventName": "PM_DPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2D02A", + "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L2", + "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache" + }, + {, + "EventCode": "0x1F05C", + "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache" + }, + {, + "EventCode": "0x4D04A", + "EventName": "PM_DARQ0_0_3_ENTRIES", + "BriefDescription": "Cycles in which 3 or less DARQ entries (out of 12) are in use" + }, + {, + "EventCode": "0x1404C", + "EventName": "PM_INST_FROM_LL4", + "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x200FD", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand iCache Miss" + }, + {, + "EventCode": "0x34040", + "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x20138", + "EventName": "PM_MRK_ST_NEST", + "BriefDescription": "Marked store sent to nest" + }, + {, + "EventCode": "0x44048", + "EventName": "PM_INST_FROM_DL2L3_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x35046", + "EventName": "PM_IPTEG_FROM_L21_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request" + }, + {, + "EventCode": "0x4C04E", + "EventName": "PM_DATA_FROM_L3MISS_MOD", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load" + }, + {, + "EventCode": "0x401E0", + "EventName": "PM_MRK_INST_CMPL", + "BriefDescription": "marked instruction completed" + }, + {, + "EventCode": "0x2C128", + "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load" + }, + {, + "EventCode": "0x34044", + "EventName": "PM_INST_FROM_L31_ECO_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4E018", + "EventName": "PM_CMPLU_STALL_NTC_DISP_FIN", + "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch." + }, + {, + "EventCode": "0x2E05E", + "EventName": "PM_LMQ_EMPTY_CYC", + "BriefDescription": "Cycles in which the LMQ has no pending load misses for this thread" + }, + {, + "EventCode": "0x4C122", + "EventName": "PM_DARQ1_0_3_ENTRIES", + "BriefDescription": "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use" + }, + {, + "EventCode": "0x4F058", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation" + }, + {, + "EventCode": "0x14046", + "EventName": "PM_INST_FROM_L31_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x3012C", + "EventName": "PM_MRK_ST_FWD", + "BriefDescription": "Marked st forwards" + }, + {, + "EventCode": "0x101E0", + "EventName": "PM_MRK_INST_DISP", + "BriefDescription": "The thread has dispatched a randomly sampled marked instruction" + }, + {, + "EventCode": "0x1D058", + "EventName": "PM_DARQ0_10_12_ENTRIES", + "BriefDescription": "Cycles in which 10 or more DARQ entries (out of 12) are in use" + }, + {, + "EventCode": "0x300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)" + }, + {, + "EventCode": "0x30006", + "EventName": "PM_CMPLU_STALL_OTHER_CMPL", + "BriefDescription": "Instructions the core completed while this tread was stalled" + }, + {, + "EventCode": "0x1005C", + "EventName": "PM_CMPLU_STALL_DP", + "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector" + }, + {, + "EventCode": "0x1E042", + "EventName": "PM_DPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x1016E", + "EventName": "PM_MRK_BR_CMPL", + "BriefDescription": "Branch Instruction completed" + }, + {, + "EventCode": "0x2013A", + "EventName": "PM_MRK_BRU_FIN", + "BriefDescription": "bru marked instr finish" + }, + {, + "EventCode": "0x4F05E", + "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache" + }, + {, + "EventCode": "0x400FC", + "EventName": "PM_ITLB_MISS", + "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed" + }, + {, + "EventCode": "0x2D024", + "EventName": "PM_RADIX_PWC_L2_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache." + }, + {, + "EventCode": "0x3F056", + "EventName": "PM_RADIX_PWC_L3_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache." + }, + {, + "EventCode": "0x4E014", + "EventName": "PM_TM_TX_PASS_RUN_INST", + "BriefDescription": "Run instructions spent in successful transactions" + }, + {, + "EventCode": "0x1E044", + "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D05A", + "EventName": "PM_NON_MATH_FLOP_CMPL", + "BriefDescription": "Non FLOP operation completed" + }, + {, + "EventCode": "0x101E2", + "EventName": "PM_MRK_BR_TAKEN_CMPL", + "BriefDescription": "Marked Branch Taken completed" + }, + {, + "EventCode": "0x3E158", + "EventName": "PM_MRK_STCX_FAIL", + "BriefDescription": "marked stcx failed" + }, + {, + "EventCode": "0x1C048", + "EventName": "PM_DATA_FROM_ON_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load" + }, + {, + "EventCode": "0x1C054", + "EventName": "PM_DATA_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load" + }, + {, + "EventCode": "0x4405E", + "EventName": "PM_DARQ_STORE_REJECT", + "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio" + }, + {, + "EventCode": "0x1C042", + "EventName": "PM_DATA_FROM_L2", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load" + }, + {, + "EventCode": "0x1D14C", + "EventName": "PM_MRK_DATA_FROM_LL4", + "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load" + }, + {, + "EventCode": "0x1006C", + "EventName": "PM_RUN_CYC_ST_MODE", + "BriefDescription": "Cycles run latch is set and core is in ST mode" + }, + {, + "EventCode": "0x3C044", + "EventName": "PM_DATA_FROM_L31_ECO_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load" + }, + {, + "EventCode": "0x4C052", + "EventName": "PM_DATA_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load" + }, + {, + "EventCode": "0x20050", + "EventName": "PM_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x1F150", + "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC", + "BriefDescription": "cycles from L2 rc disp to l2 rc completion" + }, + {, + "EventCode": "0x4505A", + "EventName": "PM_SP_FLOP_CMPL", + "BriefDescription": "SP instruction completed" + }, + {, + "EventCode": "0x4000A", + "EventName": "PM_ISQ_36_44_ENTRIES", + "BriefDescription": "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core" + }, + {, + "EventCode": "0x2C12E", + "EventName": "PM_MRK_DATA_FROM_LL4_CYC", + "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load" + }, + {, + "EventCode": "0x2C058", + "EventName": "PM_MEM_PREF", + "BriefDescription": "Memory prefetch for this thread. Includes L4" + }, + {, + "EventCode": "0x40012", + "EventName": "PM_L1_ICACHE_RELOADED_ALL", + "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch" + }, + {, + "EventCode": "0x4003C", + "EventName": "PM_DISP_HELD_SYNC_HOLD", + "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline" + }, + {, + "EventCode": "0x3003C", + "EventName": "PM_CMPLU_STALL_NESTED_TEND", + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay" + }, + {, + "EventCode": "0x3D05C", + "EventName": "PM_DISP_HELD_HB_FULL", + "BriefDescription": "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" + }, + {, + "EventCode": "0x30052", + "EventName": "PM_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x2E044", + "EventName": "PM_DPTEG_FROM_L31_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x34048", + "EventName": "PM_INST_FROM_DL2L3_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x45042", + "EventName": "PM_IPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request" + }, + {, + "EventCode": "0x15042", + "EventName": "PM_IPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request" + }, + {, + "EventCode": "0x1C05E", + "EventName": "PM_MEM_LOC_THRESH_LSU_MED", + "BriefDescription": "Local memory above threshold for data prefetch" + }, + {, + "EventCode": "0x40134", + "EventName": "PM_MRK_INST_TIMEO", + "BriefDescription": "marked Instruction finish timeout (instruction lost)" + }, + {, + "EventCode": "0x1002C", + "EventName": "PM_L1_DCACHE_RELOADED_ALL", + "BriefDescription": "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well" + }, + {, + "EventCode": "0x30130", + "EventName": "PM_MRK_INST_FIN", + "BriefDescription": "marked instruction finished" + }, + {, + "EventCode": "0x1F14A", + "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x3504E", + "EventName": "PM_DARQ0_4_6_ENTRIES", + "BriefDescription": "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use" + }, + {, + "EventCode": "0x30064", + "EventName": "PM_DARQ_STORE_XMIT", + "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core" + }, + {, + "EventCode": "0x45046", + "EventName": "PM_IPTEG_FROM_L21_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request" + }, + {, + "EventCode": "0x2C016", + "EventName": "PM_CMPLU_STALL_PASTE", + "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2" + }, + {, + "EventCode": "0x24156", + "EventName": "PM_MRK_STCX_FIN", + "BriefDescription": "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed" + }, + {, + "EventCode": "0x15150", + "EventName": "PM_SYNC_MRK_PROBE_NOP", + "BriefDescription": "Marked probeNops which can cause synchronous interrupts" + }, + {, + "EventCode": "0x301E4", + "EventName": "PM_MRK_BR_MPRED_CMPL", + "BriefDescription": "Marked Branch Mispredicted" } ] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json index e48708c10222..9960d1c0dd44 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/memory.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json @@ -1,158 +1,132 @@ [ {, - "EventCode": "0x10008", - "EventName": "PM_RUN_SPURR", - "BriefDescription": "Run SPURR", - "PublicDescription": "" - }, - {, - "EventCode": "0x1000A", - "EventName": "PM_PMC3_REWIND", - "BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change.", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C040", - "EventName": "PM_DATA_FROM_L2_NO_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C050", - "EventName": "PM_DATA_CHIP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D15E", - "EventName": "PM_MRK_RUN_CYC", - "BriefDescription": "Run cycles in which a marked instruction is in the pipeline", - "PublicDescription": "" - }, - {, - "EventCode": "0x15158", - "EventName": "PM_SYNC_MRK_L2HIT", - "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt", - "PublicDescription": "" + "EventCode": "0x3006E", + "EventName": "PM_NEST_REF_CLK", + "BriefDescription": "Multiply by 4 to obtain the number of PB cycles" }, {, "EventCode": "0x20010", "EventName": "PM_PMC1_OVERFLOW", - "BriefDescription": "Overflow from counter 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C040", - "EventName": "PM_DATA_FROM_L2_MEPF", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load", - "PublicDescription": "" + "BriefDescription": "Overflow from counter 1" }, {, "EventCode": "0x2005A", "EventName": "PM_DARQ1_7_9_ENTRIES", - "BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C05C", - "EventName": "PM_INST_GRP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D156", - "EventName": "PM_MRK_DTLB_MISS_4K", - "BriefDescription": "Marked Data TLB Miss page size 4k", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E05A", - "EventName": "PM_LRQ_REJECT", - "BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E05C", - "EventName": "PM_LSU_REJECT_ERAT_MISS", - "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)", - "PublicDescription": "" - }, - {, - "EventCode": "0x200F6", - "EventName": "PM_LSU_DERAT_MISS", - "BriefDescription": "DERAT Reloaded due to a DERAT miss", - "PublicDescription": "" + "BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use" }, {, "EventCode": "0x3C048", "EventName": "PM_DATA_FROM_DL2L3_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", - "PublicDescription": "" + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load" }, {, - "EventCode": "0x3404A", - "EventName": "PM_INST_FROM_RMEM", - "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)", - "PublicDescription": "" + "EventCode": "0x10008", + "EventName": "PM_RUN_SPURR", + "BriefDescription": "Run SPURR" }, {, - "EventCode": "0x3C058", - "EventName": "PM_LARX_FIN", - "BriefDescription": "Larx finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E050", - "EventName": "PM_DARQ1_4_6_ENTRIES", - "BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x3006E", - "EventName": "PM_NEST_REF_CLK", - "BriefDescription": "Multiply by 4 to obtain the number of PB cycles", - "PublicDescription": "" - }, - {, - "EventCode": "0x301E2", - "EventName": "PM_MRK_ST_CMPL", - "BriefDescription": "Marked store completed and sent to nest", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D02C", - "EventName": "PM_PMC1_REWIND", - "BriefDescription": "", - "PublicDescription": "" - }, - {, - "EventCode": "0x4003E", - "EventName": "PM_LD_CMPL", - "BriefDescription": "count of Loads completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C040", - "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C042", - "EventName": "PM_DATA_FROM_L3", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load", - "PublicDescription": "" + "EventCode": "0x200F6", + "EventName": "PM_LSU_DERAT_MISS", + "BriefDescription": "DERAT Reloaded due to a DERAT miss" }, {, "EventCode": "0x4C048", "EventName": "PM_DATA_FROM_DL2L3_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load", - "PublicDescription": "" + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load" + }, + {, + "EventCode": "0x1D15E", + "EventName": "PM_MRK_RUN_CYC", + "BriefDescription": "Run cycles in which a marked instruction is in the pipeline" + }, + {, + "EventCode": "0x4003E", + "EventName": "PM_LD_CMPL", + "BriefDescription": "count of Loads completed" + }, + {, + "EventCode": "0x2D156", + "EventName": "PM_MRK_DTLB_MISS_4K", + "BriefDescription": "Marked Data TLB Miss page size 4k" + }, + {, + "EventCode": "0x4C042", + "EventName": "PM_DATA_FROM_L3", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load" + }, + {, + "EventCode": "0x4D02C", + "EventName": "PM_PMC1_REWIND", + "BriefDescription": "" + }, + {, + "EventCode": "0x15158", + "EventName": "PM_SYNC_MRK_L2HIT", + "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt" + }, + {, + "EventCode": "0x3404A", + "EventName": "PM_INST_FROM_RMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x301E2", + "EventName": "PM_MRK_ST_CMPL", + "BriefDescription": "Marked store completed and sent to nest" + }, + {, + "EventCode": "0x1C050", + "EventName": "PM_DATA_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load" + }, + {, + "EventCode": "0x4C040", + "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load" + }, + {, + "EventCode": "0x2E05C", + "EventName": "PM_LSU_REJECT_ERAT_MISS", + "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)" + }, + {, + "EventCode": "0x1000A", + "EventName": "PM_PMC3_REWIND", + "BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change." + }, + {, + "EventCode": "0x3C058", + "EventName": "PM_LARX_FIN", + "BriefDescription": "Larx finished" + }, + {, + "EventCode": "0x1C040", + "EventName": "PM_DATA_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load" + }, + {, + "EventCode": "0x2C040", + "EventName": "PM_DATA_FROM_L2_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load" + }, + {, + "EventCode": "0x2E05A", + "EventName": "PM_LRQ_REJECT", + "BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects" + }, + {, + "EventCode": "0x2C05C", + "EventName": "PM_INST_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)" }, {, "EventCode": "0x4D056", "EventName": "PM_NON_FMA_FLOP_CMPL", - "BriefDescription": "Non FMA instruction completed", - "PublicDescription": "" + "BriefDescription": "Non FMA instruction completed" + }, + {, + "EventCode": "0x3E050", + "EventName": "PM_DARQ1_4_6_ENTRIES", + "BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 396e6e061d91..00f3d2a21f31 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -1,836 +1,2512 @@ [ {, - "EventCode": "0x1001C", - "EventName": "PM_CMPLU_STALL_THRD", - "BriefDescription": "Completion Stalled because the thread was blocked", - "PublicDescription": "" + "EventCode": "0x3084", + "EventName": "PM_ISU1_ISS_HOLD_ALL", + "BriefDescription": "All ISU rejects" }, {, - "EventCode": "0x1002E", - "EventName": "PM_LMQ_MERGE", - "BriefDescription": "A demand miss collides with a prefetch for the same line", - "PublicDescription": "" + "EventCode": "0xF880", + "EventName": "PM_SNOOP_TLBIE", + "BriefDescription": "TLBIE snoop" }, {, - "EventCode": "0x10134", - "EventName": "PM_MRK_ST_DONE_L2", - "BriefDescription": "marked store completed in L2 ( RC machine done)", - "PublicDescription": "" + "EventCode": "0x4088", + "EventName": "PM_IC_DEMAND_REQ", + "BriefDescription": "Demand Instruction fetch request" }, {, - "EventCode": "0x10138", - "EventName": "PM_MRK_BR_2PATH", - "BriefDescription": "marked branches which are not strongly biased", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C04A", - "EventName": "PM_DATA_FROM_RL2L3_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C04C", - "EventName": "PM_DATA_FROM_LL4", - "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D140", - "EventName": "PM_MRK_DATA_FROM_L3.1_MOD_CYC", - "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D144", - "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D146", - "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC", - "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D148", - "EventName": "PM_MRK_DATA_FROM_RMEM", - "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D14E", - "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", - "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x15040", - "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1504C", - "EventName": "PM_IPTEG_FROM_LL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E048", - "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E04E", - "EventName": "PM_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F146", - "EventName": "PM_MRK_DPTEG_FROM_L3.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x10052", - "EventName": "PM_GRP_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C05C", - "EventName": "PM_DTLB_MISS_2M", - "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used", - "PublicDescription": "" - }, - {, - "EventCode": "0x14156", - "EventName": "PM_MRK_DATA_FROM_L2_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x14158", - "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1415C", - "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D150", - "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D152", - "EventName": "PM_MRK_DATA_FROM_DL4", - "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D156", - "EventName": "PM_MRK_LD_MISS_L1_CYC", - "BriefDescription": "Marked ld latency", - "PublicDescription": "" - }, - {, - "EventCode": "0x15154", - "EventName": "PM_SYNC_MRK_L3MISS", - "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x1515A", - "EventName": "PM_SYNC_MRK_L2MISS", - "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E05A", - "EventName": "PM_CMPLU_STALL_ANY_SYNC", - "BriefDescription": "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete ", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E05C", - "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", - "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F152", - "EventName": "PM_MRK_FAB_RSP_BKILL_CYC", - "BriefDescription": "cycles L2 RC took for a bkill", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F056", - "EventName": "PM_RADIX_PWC_L1_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit.", - "PublicDescription": "" - }, - {, - "EventCode": "0x101E4", - "EventName": "PM_MRK_L1_ICACHE_MISS", - "BriefDescription": "sampled Instruction suffered an icache Miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x101EA", - "EventName": "PM_MRK_L1_RELOAD_VALID", - "BriefDescription": "Marked demand reload", - "PublicDescription": "" - }, - {, - "EventCode": "0x100FA", - "EventName": "PM_ANY_THRD_RUN_CYC", - "BriefDescription": "Cycles in which at least one thread has the run latch set", - "PublicDescription": "" - }, - {, - "EventCode": "0x100FC", - "EventName": "PM_LD_REF_L1", - "BriefDescription": "All L1 D cache load references counted at finish, gated by reject", - "PublicDescription": "" - }, - {, - "EventCode": "0x20006", - "EventName": "PM_DISP_HELD_ISSQ_FULL", - "BriefDescription": "Dispatch held due to Issue q full. Includes issue queue and branch queue", - "PublicDescription": "" - }, - {, - "EventCode": "0x2000C", - "EventName": "PM_THRD_ALL_RUN_CYC", - "BriefDescription": "Cycles in which all the threads have the run latch set", - "PublicDescription": "" - }, - {, - "EventCode": "0x2001A", - "EventName": "PM_NTC_ALL_FIN", - "BriefDescription": "Cycles after all instructions have finished to group completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D014", - "EventName": "PM_CMPLU_STALL_LRQ_FULL", - "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D018", - "EventName": "PM_CMPLU_STALL_EXEC_UNIT", - "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D01E", - "EventName": "PM_ICT_NOSLOT_DISP_HELD_ISSQ", - "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E014", - "EventName": "PM_STCX_FIN", - "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C120", - "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C122", - "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C126", - "EventName": "PM_MRK_DATA_FROM_L2", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C12A", - "EventName": "PM_MRK_DATA_FROM_RMEM_CYC", - "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C12C", - "EventName": "PM_MRK_DATA_FROM_DL4_CYC", - "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D120", - "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE", - "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D026", - "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L2", - "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x20132", - "EventName": "PM_MRK_DFU_FIN", - "BriefDescription": "Decimal Unit marked Instruction Finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x20134", - "EventName": "PM_MRK_FXU_FIN", - "BriefDescription": "fxu marked instr finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C04E", - "EventName": "PM_LD_MISS_L1_FIN", - "BriefDescription": "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op.", - "PublicDescription": "" - }, - {, - "EventCode": "0x24040", - "EventName": "PM_INST_FROM_L2_MEPF", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x24048", - "EventName": "PM_INST_FROM_LMEM", - "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D142", - "EventName": "PM_MRK_DATA_FROM_L3_MEPF", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D144", - "EventName": "PM_MRK_DATA_FROM_L3.1_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D148", - "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x25048", - "EventName": "PM_IPTEG_FROM_LMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E040", - "EventName": "PM_DPTEG_FROM_L2_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E04A", - "EventName": "PM_DPTEG_FROM_RL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F14A", - "EventName": "PM_MRK_DPTEG_FROM_RL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x20054", - "EventName": "PM_L1_PREF", - "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x20056", - "EventName": "PM_TAKEN_BR_MPRED_CMPL", - "BriefDescription": "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions", - "PublicDescription": "" - }, - {, - "EventCode": "0x20058", - "EventName": "PM_DARQ1_10_12_ENTRIES", - "BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C050", - "EventName": "PM_DATA_GRP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C05E", - "EventName": "PM_INST_GRP_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2505C", - "EventName": "PM_VSU_FIN", - "BriefDescription": "VSU instruction finished. Up to 4 per cycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x2505E", - "EventName": "PM_BACK_BR_CMPL", - "BriefDescription": "Branch instruction completed with a target address less than current instruction address", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E052", - "EventName": "PM_TM_PASSED", - "BriefDescription": "Number of TM transactions that passed", - "PublicDescription": "" - }, - {, - "EventCode": "0x20064", - "EventName": "PM_IERAT_RELOAD_4K", - "BriefDescription": "IERAT reloaded (after a miss) for 4K pages", - "PublicDescription": "" - }, - {, - "EventCode": "0x2006C", - "EventName": "PM_RUN_CYC_SMT4_MODE", - "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode", - "PublicDescription": "" - }, - {, - "EventCode": "0x201E0", - "EventName": "PM_MRK_DATA_FROM_MEMORY", - "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x201E4", - "EventName": "PM_MRK_DATA_FROM_L3MISS", - "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x201E8", - "EventName": "PM_THRESH_EXC_512", - "BriefDescription": "Threshold counter exceeded a value of 512", - "PublicDescription": "" - }, - {, - "EventCode": "0x200F2", - "EventName": "PM_INST_DISP", - "BriefDescription": "# PPC Dispatched", - "PublicDescription": "" - }, - {, - "EventCode": "0x30016", - "EventName": "PM_CMPLU_STALL_SRQ_FULL", - "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", - "PublicDescription": "" - }, - {, - "EventCode": "0x30018", - "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", - "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3001A", - "EventName": "PM_DATA_TABLEWALK_CYC", - "BriefDescription": "Data Tablewalk Cycles. Could be 1 or 2 active tablewalks. Includes data prefetches.", - "PublicDescription": "" - }, - {, - "EventCode": "0x30132", - "EventName": "PM_MRK_VSU_FIN", - "BriefDescription": "VSU marked instr finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x30134", - "EventName": "PM_MRK_ST_CMPL_INT", - "BriefDescription": "marked store finished with intervention", - "PublicDescription": "" - }, - {, - "EventCode": "0x30038", - "EventName": "PM_CMPLU_STALL_DMISS_LMEM", - "BriefDescription": "Completion stall due to cache miss that resolves in local memory", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C040", - "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C042", - "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D140", - "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D144", - "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D146", - "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT", - "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D14C", - "EventName": "PM_MRK_DATA_FROM_DMEM", - "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D14E", - "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x35042", - "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x35048", - "EventName": "PM_IPTEG_FROM_DL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x3504C", - "EventName": "PM_IPTEG_FROM_DL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F146", - "EventName": "PM_MRK_DPTEG_FROM_L2.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3005A", - "EventName": "PM_ISQ_0_8_ENTRIES", - "BriefDescription": "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x3005C", - "EventName": "PM_BFU_BUSY", - "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C05E", - "EventName": "PM_MEM_RWITM", - "BriefDescription": "Memory Read With Intent to Modify for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x34054", - "EventName": "PM_PARTIAL_ST_FIN", - "BriefDescription": "Any store finished by an LSU slice", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D15E", - "EventName": "PM_MULT_MRK", - "BriefDescription": "mult marked instr", - "PublicDescription": "" - }, - {, - "EventCode": "0x35152", - "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x35154", - "EventName": "PM_MRK_DATA_FROM_L3_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x35156", - "EventName": "PM_MRK_DATA_FROM_L3.1_SHR_CYC", - "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x35158", - "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_MOD_CYC", - "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3515E", - "EventName": "PM_MRK_BACK_BR_CMPL", - "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E05E", - "EventName": "PM_L3_CO_MEPF", - "BriefDescription": "L3 castouts in Mepf state for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F150", - "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", - "BriefDescription": "cycles to drain st from core to L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F054", - "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x30162", - "EventName": "PM_MRK_LSU_DERAT_MISS", - "BriefDescription": "Marked derat reload (miss) for any page size", - "PublicDescription": "" - }, - {, - "EventCode": "0x3006A", - "EventName": "PM_IERAT_RELOAD_64K", - "BriefDescription": "IERAT Reloaded (Miss) for a 64k page", - "PublicDescription": "" - }, - {, - "EventCode": "0x300F8", - "EventName": "PM_TB_BIT_TRANS", - "BriefDescription": "timebase event", - "PublicDescription": "" - }, - {, - "EventCode": "0x40006", - "EventName": "PM_ISLB_MISS", - "BriefDescription": "Number of ISLB misses for this thread", - "PublicDescription": "" + "EventCode": "0x20A4", + "EventName": "PM_TM_TRESUME", + "BriefDescription": "TM resume instruction completed" }, {, "EventCode": "0x40008", "EventName": "PM_SRQ_EMPTY_CYC", - "BriefDescription": "Cycles in which the SRQ has at least one (out of four) empty slice", - "PublicDescription": "" + "BriefDescription": "Cycles in which the SRQ has at least one (out of four) empty slice" }, {, - "EventCode": "0x40014", - "EventName": "PM_PROBE_NOP_DISP", - "BriefDescription": "ProbeNops dispatched", - "PublicDescription": "" + "EventCode": "0x20064", + "EventName": "PM_IERAT_RELOAD_4K", + "BriefDescription": "IERAT reloaded (after a miss) for 4K pages" }, {, - "EventCode": "0x4001C", - "EventName": "PM_INST_IMC_MATCH_CMPL", - "BriefDescription": "IMC Match Count", - "PublicDescription": "" + "EventCode": "0x260B4", + "EventName": "PM_L3_P2_LCO_RTY", + "BriefDescription": "L3 initiated LCO received retry on port 2 (can try 4 times)" }, {, - "EventCode": "0x4C01A", - "EventName": "PM_CMPLU_STALL_DMISS_L3MISS", - "BriefDescription": "Completion stall due to cache miss resolving missed the L3", - "PublicDescription": "" + "EventCode": "0x20006", + "EventName": "PM_DISP_HELD_ISSQ_FULL", + "BriefDescription": "Dispatch held due to Issue q full. Includes issue queue and branch queue" }, {, - "EventCode": "0x4D012", - "EventName": "PM_PMC3_SAVED", - "BriefDescription": "PMC3 Rewind Value saved", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E11E", - "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", - "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C124", - "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC", - "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D12E", - "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC", - "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4013A", - "EventName": "PM_MRK_IC_MISS", - "BriefDescription": "Marked instruction experienced I cache miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x44044", - "EventName": "PM_INST_FROM_L3.1_ECO_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x44046", - "EventName": "PM_INST_FROM_L2.1_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4404A", - "EventName": "PM_INST_FROM_OFF_CHIP_CACHE", - "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D144", - "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D146", - "EventName": "PM_MRK_DATA_FROM_L2.1_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4504C", - "EventName": "PM_IPTEG_FROM_DMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request", - "PublicDescription": "" + "EventCode": "0x201E4", + "EventName": "PM_MRK_DATA_FROM_L3MISS", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load" }, {, "EventCode": "0x4E044", - "EventName": "PM_DPTEG_FROM_L3.1_ECO_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventName": "PM_DPTEG_FROM_L31_ECO_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4E04A", - "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventCode": "0x40B8", + "EventName": "PM_BR_MPRED_TAKEN_CR", + "BriefDescription": "A Conditional Branch that resolved to taken was mispredicted as not taken (due to the BHT Direction Prediction)." }, {, - "EventCode": "0x40154", - "EventName": "PM_MRK_FAB_RSP_BKILL", - "BriefDescription": "Marked store had to do a bkill", - "PublicDescription": "" + "EventCode": "0xF8AC", + "EventName": "PM_DC_DEALLOC_NO_CONF", + "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)" }, {, - "EventCode": "0x4C054", - "EventName": "PM_DERAT_MISS_16G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G", - "PublicDescription": "" + "EventCode": "0xD090", + "EventName": "PM_LS0_DC_COLLISIONS", + "BriefDescription": "Read-write data cache collisions" }, {, - "EventCode": "0x4C05A", - "EventName": "PM_DTLB_MISS_1G", - "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used", - "PublicDescription": "" + "EventCode": "0x40BC", + "EventName": "PM_THRD_PRIO_0_1_CYC", + "BriefDescription": "Cycles thread running at priority level 0 or 1" }, {, - "EventCode": "0x44054", - "EventName": "PM_VECTOR_LD_CMPL", - "BriefDescription": "Number of vector load instructions completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D05E", - "EventName": "PM_BR_CMPL", - "BriefDescription": "Any Branch instruction completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x45054", - "EventName": "PM_FMA_CMPL", - "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. ", - "PublicDescription": "" - }, - {, - "EventCode": "0x45056", - "EventName": "PM_SCALAR_FLOP_CMPL", - "BriefDescription": "Scalar flop operation completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4505C", - "EventName": "PM_MATH_FLOP_CMPL", - "BriefDescription": "Math flop instruction completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E05E", - "EventName": "PM_TM_OUTER_TBEGIN_DISP", - "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions", - "PublicDescription": "" + "EventCode": "0x2084", + "EventName": "PM_FLUSH_HB_RESTORE_CYC", + "BriefDescription": "Cycles in which no new instructions can be dispatched to the ICT after a flush. History buffer recovery" }, {, "EventCode": "0x4F054", "EventName": "PM_RADIX_PWC_MISS", - "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache.", - "PublicDescription": "" + "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache." }, {, - "EventCode": "0x4F05C", - "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache", - "PublicDescription": "" + "EventCode": "0x24048", + "EventName": "PM_INST_FROM_LMEM", + "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0xD8B4", + "EventName": "PM_LSU0_LRQ_S0_VALID_CYC", + "BriefDescription": "Slot 0 of LRQ valid" + }, + {, + "EventCode": "0x2E052", + "EventName": "PM_TM_PASSED", + "BriefDescription": "Number of TM transactions that passed" + }, + {, + "EventCode": "0xD1A0", + "EventName": "PM_MRK_LSU_FLUSH_LHS", + "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" + }, + {, + "EventCode": "0xF088", + "EventName": "PM_LSU0_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0x360B2", + "EventName": "PM_L3_GRP_GUESS_WRONG_LOW", + "BriefDescription": "Initial scope=group (GS or NNS) but data from outside group (far or rem). Prediction too Low" + }, + {, + "EventCode": "0x168A6", + "EventName": "PM_TM_CAM_OVERFLOW", + "BriefDescription": "L3 TM cam overflow during L2 co of SC" + }, + {, + "EventCode": "0xE8B0", + "EventName": "PM_TEND_PEND_CYC", + "BriefDescription": "TEND latency per thread" + }, + {, + "EventCode": "0x4884", + "EventName": "PM_IBUF_FULL_CYC", + "BriefDescription": "Cycles No room in ibuff" + }, + {, + "EventCode": "0xD08C", + "EventName": "PM_LSU2_LDMX_FIN", + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])" + }, + {, + "EventCode": "0x300F8", + "EventName": "PM_TB_BIT_TRANS", + "BriefDescription": "timebase event" + }, + {, + "EventCode": "0x3C040", + "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load" + }, + {, + "EventCode": "0xE0BC", + "EventName": "PM_LS0_PTE_TABLEWALK_CYC", + "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 0" + }, + {, + "EventCode": "0x3884", + "EventName": "PM_ISU3_ISS_HOLD_ALL", + "BriefDescription": "All ISU rejects" + }, + {, + "EventCode": "0x460A6", + "EventName": "PM_RD_FORMING_SC", + "BriefDescription": "Read forming SC" + }, + {, + "EventCode": "0x468A0", + "EventName": "PM_L3_PF_OFF_CHIP_MEM", + "BriefDescription": "L3 PF from Off chip memory" + }, + {, + "EventCode": "0x268AA", + "EventName": "PM_L3_P1_LCO_DATA", + "BriefDescription": "LCO sent with data port 1" + }, + {, + "EventCode": "0xE894", + "EventName": "PM_LSU1_TM_L1_HIT", + "BriefDescription": "Load tm hit in L1" + }, + {, + "EventCode": "0x5888", + "EventName": "PM_IC_INVALIDATE", + "BriefDescription": "Ic line invalidated" + }, + {, + "EventCode": "0x2890", + "EventName": "PM_DISP_CLB_HELD_TLBIE", + "BriefDescription": "Dispatch Hold: Due to TLBIE" + }, + {, + "EventCode": "0x1001C", + "EventName": "PM_CMPLU_STALL_THRD", + "BriefDescription": "Completion Stalled because the thread was blocked" + }, + {, + "EventCode": "0x368A6", + "EventName": "PM_SNP_TM_HIT_T", + "BriefDescription": "Snp TM sthit T/Tn/Te" + }, + {, + "EventCode": "0x3001A", + "EventName": "PM_DATA_TABLEWALK_CYC", + "BriefDescription": "Data Tablewalk Cycles. Could be 1 or 2 active tablewalks. Includes data prefetches." + }, + {, + "EventCode": "0xD894", + "EventName": "PM_LS3_DC_COLLISIONS", + "BriefDescription": "Read-write data cache collisions" + }, + {, + "EventCode": "0x35158", + "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0xF0B4", + "EventName": "PM_DC_PREF_CONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch" + }, + {, + "EventCode": "0xF894", + "EventName": "PM_LSU3_L1_CAM_CANCEL", + "BriefDescription": "ls3 l1 tm cam cancel" + }, + {, + "EventCode": "0x2888", + "EventName": "PM_FLUSH_DISP_TLBIE", + "BriefDescription": "Dispatch Flush: TLBIE" + }, + {, + "EventCode": "0xD1A4", + "EventName": "PM_MRK_LSU_FLUSH_SAO", + "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush" + }, + {, + "EventCode": "0x4E11E", + "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load" + }, + {, + "EventCode": "0x5894", + "EventName": "PM_LWSYNC", + "BriefDescription": "Lwsync instruction decoded and transferred" + }, + {, + "EventCode": "0x14156", + "EventName": "PM_MRK_DATA_FROM_L2_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load" + }, + {, + "EventCode": "0x468A6", + "EventName": "PM_RD_CLEARING_SC", + "BriefDescription": "Read clearing SC" + }, + {, + "EventCode": "0x50A0", + "EventName": "PM_HWSYNC", + "BriefDescription": "Hwsync instruction decoded and transferred" + }, + {, + "EventCode": "0x168B0", + "EventName": "PM_L3_P1_NODE_PUMP", + "BriefDescription": "L3 PF sent with nodal scope port 1, counts even retried requests" + }, + {, + "EventCode": "0xD0BC", + "EventName": "PM_LSU0_1_LRQF_FULL_CYC", + "BriefDescription": "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ" + }, + {, + "EventCode": "0x2D148", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load" + }, + {, + "EventCode": "0x460A8", + "EventName": "PM_SN_HIT", + "BriefDescription": "Any port snooper hit L3. Up to 4 can happen in a cycle but we only count 1" + }, + {, + "EventCode": "0x360AA", + "EventName": "PM_L3_P0_CO_MEM", + "BriefDescription": "L3 CO to memory port 0 with or without data" + }, + {, + "EventCode": "0xF0A4", + "EventName": "PM_DC_PREF_HW_ALLOC", + "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" + }, + {, + "EventCode": "0xF0BC", + "EventName": "PM_LS2_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0xD0AC", + "EventName": "PM_SRQ_SYNC_CYC", + "BriefDescription": "A sync is in the S2Q (edge detect to count)" }, {, "EventCode": "0x401E6", "EventName": "PM_MRK_INST_FROM_L3MISS", - "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet", - "PublicDescription": "" + "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet" }, {, - "EventCode": "0x401E8", - "EventName": "PM_MRK_DATA_FROM_L2MISS", - "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load", - "PublicDescription": "" + "EventCode": "0x26082", + "EventName": "PM_L2_IC_INV", + "BriefDescription": "I-cache Invalidates sent over the realod bus to the core" + }, + {, + "EventCode": "0xC8AC", + "EventName": "PM_LSU_FLUSH_RELAUNCH_MISS", + "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent" + }, + {, + "EventCode": "0x260A4", + "EventName": "PM_L3_LD_HIT", + "BriefDescription": "L3 Hits for demand LDs" + }, + {, + "EventCode": "0xF0A0", + "EventName": "PM_DATA_STORE", + "BriefDescription": "All ops that drain from s2q to L2 containing data" + }, + {, + "EventCode": "0x1D148", + "EventName": "PM_MRK_DATA_FROM_RMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load" + }, + {, + "EventCode": "0x16088", + "EventName": "PM_L2_LOC_GUESS_CORRECT", + "BriefDescription": "L2 guess local (LNS) and guess was correct (ie data local)" + }, + {, + "EventCode": "0x160A4", + "EventName": "PM_L3_HIT", + "BriefDescription": "L3 Hits (L2 miss hitting L3, including data/instrn/xlate)" + }, + {, + "EventCode": "0xE09C", + "EventName": "PM_LSU0_TM_L1_MISS", + "BriefDescription": "Load tm L1 miss" + }, + {, + "EventCode": "0x168B4", + "EventName": "PM_L3_P1_LCO_RTY", + "BriefDescription": "L3 initiated LCO received retry on port 1 (can try 4 times)" + }, + {, + "EventCode": "0x268AC", + "EventName": "PM_L3_RD_USAGE", + "BriefDescription": "Rotating sample of 16 RD actives" + }, + {, + "EventCode": "0x1415C", + "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load" + }, + {, + "EventCode": "0xE880", + "EventName": "PM_L1_SW_PREF", + "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches" + }, + {, + "EventCode": "0x288C", + "EventName": "PM_DISP_CLB_HELD_BAL", + "BriefDescription": "Dispatch/CLB Hold: Balance Flush" + }, + {, + "EventCode": "0x101EA", + "EventName": "PM_MRK_L1_RELOAD_VALID", + "BriefDescription": "Marked demand reload" + }, + {, + "EventCode": "0x1D156", + "EventName": "PM_MRK_LD_MISS_L1_CYC", + "BriefDescription": "Marked ld latency" + }, + {, + "EventCode": "0x4C01A", + "EventName": "PM_CMPLU_STALL_DMISS_L3MISS", + "BriefDescription": "Completion stall due to cache miss resolving missed the L3" + }, + {, + "EventCode": "0x2006C", + "EventName": "PM_RUN_CYC_SMT4_MODE", + "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode" + }, + {, + "EventCode": "0x5088", + "EventName": "PM_DECODE_FUSION_OP_PRESERV", + "BriefDescription": "Destructive op operand preservation" + }, + {, + "EventCode": "0x1D14E", + "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", + "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" + }, + {, + "EventCode": "0x509C", + "EventName": "PM_FORCED_NOP", + "BriefDescription": "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time" + }, + {, + "EventCode": "0xC098", + "EventName": "PM_LS2_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x20058", + "EventName": "PM_DARQ1_10_12_ENTRIES", + "BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use" + }, + {, + "EventCode": "0x360A6", + "EventName": "PM_SNP_TM_HIT_M", + "BriefDescription": "Snp TM st hit M/Mu" + }, + {, + "EventCode": "0x5898", + "EventName": "PM_LINK_STACK_INVALID_PTR", + "BriefDescription": "It is most often caused by certain types of flush where the pointer is not available. Can result in the data in the link stack becoming unusable." + }, + {, + "EventCode": "0x46088", + "EventName": "PM_L2_CHIP_PUMP", + "BriefDescription": "RC requests that were local (aka chip) pump attempts" + }, + {, + "EventCode": "0x28A0", + "EventName": "PM_TM_TSUSPEND", + "BriefDescription": "TM suspend instruction completed" + }, + {, + "EventCode": "0x20054", + "EventName": "PM_L1_PREF", + "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch" + }, + {, + "EventCode": "0xF888", + "EventName": "PM_LSU1_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0x4505E", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operation Finished" + }, + {, + "EventCode": "0x1D144", + "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load" }, {, "EventCode": "0x400FA", "EventName": "PM_RUN_INST_CMPL", - "BriefDescription": "Run_Instructions", - "PublicDescription": "" + "BriefDescription": "Run_Instructions" + }, + {, + "EventCode": "0x15154", + "EventName": "PM_SYNC_MRK_L3MISS", + "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt" + }, + {, + "EventCode": "0xE0B4", + "EventName": "PM_LS0_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, + {, + "EventCode": "0x26884", + "EventName": "PM_DSIDE_MRU_TOUCH", + "BriefDescription": "D-side L2 MRU touch sent to L2" + }, + {, + "EventCode": "0x30134", + "EventName": "PM_MRK_ST_CMPL_INT", + "BriefDescription": "marked store finished with intervention" + }, + {, + "EventCode": "0xC0B8", + "EventName": "PM_LSU_FLUSH_SAO", + "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush" + }, + {, + "EventCode": "0x50A8", + "EventName": "PM_EAT_FORCE_MISPRED", + "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" + }, + {, + "EventCode": "0xC094", + "EventName": "PM_LS0_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0xF8BC", + "EventName": "PM_LS3_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x58B0", + "EventName": "PM_BTAC_GOOD_RESULT", + "BriefDescription": "BTAC predicts a taken branch and the BHT agrees, and the target address is correct" + }, + {, + "EventCode": "0x1C04C", + "EventName": "PM_DATA_FROM_LL4", + "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load" + }, + {, + "EventCode": "0x3608E", + "EventName": "PM_TM_ST_CONF", + "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)" + }, + {, + "EventCode": "0xD998", + "EventName": "PM_MRK_LSU_FLUSH_EMSH", + "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" + }, + {, + "EventCode": "0xF8A0", + "EventName": "PM_NON_DATA_STORE", + "BriefDescription": "All ops that drain from s2q to L2 and contain no data" + }, + {, + "EventCode": "0x3F146", + "EventName": "PM_MRK_DPTEG_FROM_L21_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x40A0", + "EventName": "PM_BR_UNCOND", + "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve." + }, + {, + "EventCode": "0x1F056", + "EventName": "PM_RADIX_PWC_L1_HIT", + "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit." + }, + {, + "EventCode": "0xF8A8", + "EventName": "PM_DC_PREF_FUZZY_CONF", + "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)" + }, + {, + "EventCode": "0xF8A4", + "EventName": "PM_DC_PREF_SW_ALLOC", + "BriefDescription": "Prefetch stream allocated by software prefetching" + }, + {, + "EventCode": "0xE0A0", + "EventName": "PM_LSU2_TM_L1_MISS", + "BriefDescription": "Load tm L1 miss" + }, + {, + "EventCode": "0x2894", + "EventName": "PM_TM_OUTER_TEND", + "BriefDescription": "Completion time outer tend" + }, + {, + "EventCode": "0xF098", + "EventName": "PM_XLATE_HPT_MODE", + "BriefDescription": "LSU reports every cycle the thread is in HPT translation mode (as opposed to radix mode)" + }, + {, + "EventCode": "0x2C04E", + "EventName": "PM_LD_MISS_L1_FIN", + "BriefDescription": "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op." + }, + {, + "EventCode": "0x30162", + "EventName": "PM_MRK_LSU_DERAT_MISS", + "BriefDescription": "Marked derat reload (miss) for any page size" + }, + {, + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" + }, + {, + "EventCode": "0x1C04A", + "EventName": "PM_DATA_FROM_RL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" + }, + {, + "EventCode": "0xD99C", + "EventName": "PM_MRK_LSU_FLUSH_UE", + "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" + }, + {, + "EventCode": "0x268B0", + "EventName": "PM_L3_P1_GRP_PUMP", + "BriefDescription": "L3 PF sent with grp scope port 1, counts even retried requests" + }, + {, + "EventCode": "0x30016", + "EventName": "PM_CMPLU_STALL_SRQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full" + }, + {, + "EventCode": "0x40B4", + "EventName": "PM_BR_PRED_TA", + "BriefDescription": "Conditional Branch Completed that had its target address predicted. Only XL-form branches set this event. This equal the sum of CCACHE, LSTACK, and PCACHE" + }, + {, + "EventCode": "0x40AC", + "EventName": "PM_BR_MPRED_CCACHE", + "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction" + }, + {, + "EventCode": "0x3688A", + "EventName": "PM_L2_RTY_LD", + "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" + }, + {, + "EventCode": "0x3689E", + "EventName": "PM_L2_RTY_LD", + "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" + }, + {, + "EventCode": "0xE08C", + "EventName": "PM_LSU0_ERAT_HIT", + "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" + }, + {, + "EventCode": "0xE088", + "EventName": "PM_LS2_ERAT_MISS_PREF", + "BriefDescription": "LS0 Erat miss due to prefetch" + }, + {, + "EventCode": "0xF0A8", + "EventName": "PM_DC_PREF_CONF", + "BriefDescription": "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Includes forwards and backwards streams" + }, + {, + "EventCode": "0x16888", + "EventName": "PM_L2_LOC_GUESS_WRONG", + "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)" + }, + {, + "EventCode": "0xE0A4", + "EventName": "PM_TMA_REQ_L2", + "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" + }, + {, + "EventCode": "0x5884", + "EventName": "PM_DECODE_LANES_NOT_AVAIL", + "BriefDescription": "Decode has something to transmit but dispatch lanes are not available" + }, + {, + "EventCode": "0x3C042", + "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load" + }, + {, + "EventCode": "0x168AA", + "EventName": "PM_L3_P1_LCO_NO_DATA", + "BriefDescription": "Dataless L3 LCO sent port 1" + }, + {, + "EventCode": "0x3D140", + "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load" + }, + {, + "EventCode": "0xC89C", + "EventName": "PM_LS1_LAUNCH_HELD_PREF", + "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle" + }, + {, + "EventCode": "0x4894", + "EventName": "PM_IC_RELOAD_PRIVATE", + "BriefDescription": "Reloading line was brought in private for a specific thread. Most lines are brought in shared for all eight threads. If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat" + }, + {, + "EventCode": "0x1688E", + "EventName": "PM_TM_LD_CAUSED_FAIL", + "BriefDescription": "Non-TM Load caused any thread to fail" + }, + {, + "EventCode": "0x26084", + "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER", + "BriefDescription": "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x101E4", + "EventName": "PM_MRK_L1_ICACHE_MISS", + "BriefDescription": "sampled Instruction suffered an icache Miss" + }, + {, + "EventCode": "0x20A0", + "EventName": "PM_TM_NESTED_TBEGIN", + "BriefDescription": "Completion Tm nested tbegin" + }, + {, + "EventCode": "0x368AA", + "EventName": "PM_L3_P1_CO_MEM", + "BriefDescription": "L3 CO to memory port 1 with or without data" + }, + {, + "EventCode": "0xC8A4", + "EventName": "PM_LSU3_FALSE_LHS", + "BriefDescription": "False LHS match detected" + }, + {, + "EventCode": "0xD9A4", + "EventName": "PM_MRK_LSU_FLUSH_LARX_STCX", + "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches" + }, + {, + "EventCode": "0x4D012", + "EventName": "PM_PMC3_SAVED", + "BriefDescription": "PMC3 Rewind Value saved" + }, + {, + "EventCode": "0xE888", + "EventName": "PM_LS3_ERAT_MISS_PREF", + "BriefDescription": "LS1 Erat miss due to prefetch" + }, + {, + "EventCode": "0x368B4", + "EventName": "PM_L3_RD0_BUSY", + "BriefDescription": "Lifetime, sample of RD machine 0 valid" + }, + {, + "EventCode": "0x468B4", + "EventName": "PM_L3_RD0_BUSY", + "BriefDescription": "Lifetime, sample of RD machine 0 valid" + }, + {, + "EventCode": "0x46080", + "EventName": "PM_L2_DISP_ALL_L2MISS", + "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0xF8B8", + "EventName": "PM_LS1_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x408C", + "EventName": "PM_L1_DEMAND_WRITE", + "BriefDescription": "Instruction Demand sectors written into IL1" + }, + {, + "EventCode": "0x368A8", + "EventName": "PM_SN_INVL", + "BriefDescription": "Any port snooper detects a store to a line in the Sx state and invalidates the line. Up to 4 can happen in a cycle but we only count 1" + }, + {, + "EventCode": "0x160B2", + "EventName": "PM_L3_LOC_GUESS_CORRECT", + "BriefDescription": "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only" + }, + {, + "EventCode": "0x48B4", + "EventName": "PM_DECODE_FUSION_CONST_GEN", + "BriefDescription": "32-bit constant generation" + }, + {, + "EventCode": "0x4D146", + "EventName": "PM_MRK_DATA_FROM_L21_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load" + }, + {, + "EventCode": "0xE080", + "EventName": "PM_S2Q_FULL", + "BriefDescription": "Cycles during which the S2Q is full" + }, + {, + "EventCode": "0x268B4", + "EventName": "PM_L3_P3_LCO_RTY", + "BriefDescription": "L3 initiated LCO received retry on port 3 (can try 4 times)" + }, + {, + "EventCode": "0xD8B8", + "EventName": "PM_LSU0_LMQ_S0_VALID", + "BriefDescription": "Slot 0 of LMQ valid" + }, + {, + "EventCode": "0x2098", + "EventName": "PM_TM_NESTED_TEND", + "BriefDescription": "Completion time nested tend" + }, + {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, + {, + "EventCode": "0x368A0", + "EventName": "PM_L3_PF_OFF_CHIP_CACHE", + "BriefDescription": "L3 PF from Off chip cache" + }, + {, + "EventCode": "0x20056", + "EventName": "PM_TAKEN_BR_MPRED_CMPL", + "BriefDescription": "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions" + }, + {, + "EventCode": "0x4688A", + "EventName": "PM_L2_SYS_PUMP", + "BriefDescription": "RC requests that were system pump attempts" + }, + {, + "EventCode": "0xE090", + "EventName": "PM_LSU2_ERAT_HIT", + "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" + }, + {, + "EventCode": "0x4001C", + "EventName": "PM_INST_IMC_MATCH_CMPL", + "BriefDescription": "IMC Match Count" + }, + {, + "EventCode": "0x40A8", + "EventName": "PM_BR_PRED_LSTACK", + "BriefDescription": "Conditional Branch Completed that used the Link Stack for Target Prediction" + }, + {, + "EventCode": "0x268A2", + "EventName": "PM_L3_CI_MISS", + "BriefDescription": "L3 castins miss (total count)" + }, + {, + "EventCode": "0x289C", + "EventName": "PM_TM_NON_FAV_TBEGIN", + "BriefDescription": "Dispatch time non favored tbegin" + }, + {, + "EventCode": "0xF08C", + "EventName": "PM_LSU2_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0x360A0", + "EventName": "PM_L3_PF_ON_CHIP_CACHE", + "BriefDescription": "L3 PF from On chip cache" + }, + {, + "EventCode": "0x35152", + "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load" + }, + {, + "EventCode": "0x160AC", + "EventName": "PM_L3_SN_USAGE", + "BriefDescription": "Rotating sample of 16 snoop valids" + }, + {, + "EventCode": "0x16084", + "EventName": "PM_L2_RCLD_DISP", + "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x1608C", + "EventName": "PM_RC0_BUSY", + "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x2608C", + "EventName": "PM_RC0_BUSY", + "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x36082", + "EventName": "PM_L2_LD_DISP", + "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." + }, + {, + "EventCode": "0x1609E", + "EventName": "PM_L2_LD_DISP", + "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)" + }, + {, + "EventCode": "0xF8B0", + "EventName": "PM_L3_SW_PREF", + "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest" + }, + {, + "EventCode": "0xF884", + "EventName": "PM_TABLEWALK_CYC_PREF", + "BriefDescription": "tablewalk qualified for pte prefetches" + }, + {, + "EventCode": "0x4D144", + "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x16884", + "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR", + "BriefDescription": "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x460A0", + "EventName": "PM_L3_PF_ON_CHIP_MEM", + "BriefDescription": "L3 PF from On chip memory" + }, + {, + "EventCode": "0xF084", + "EventName": "PM_PTE_PREFETCH", + "BriefDescription": "PTE prefetches" + }, + {, + "EventCode": "0x2D026", + "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L2", + "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache" + }, + {, + "EventCode": "0x48B0", + "EventName": "PM_BR_MPRED_PCACHE", + "BriefDescription": "Conditional Branch Completed that was Mispredicted due to pattern cache prediction" + }, + {, + "EventCode": "0x2C126", + "EventName": "PM_MRK_DATA_FROM_L2", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load" + }, + {, + "EventCode": "0xE0AC", + "EventName": "PM_TM_FAIL_TLBIE", + "BriefDescription": "Transaction failed because there was a TLBIE hit in the bloom filter" + }, + {, + "EventCode": "0x260AA", + "EventName": "PM_L3_P0_LCO_DATA", + "BriefDescription": "LCO sent with data port 0" + }, + {, + "EventCode": "0x4888", + "EventName": "PM_IC_PREF_REQ", + "BriefDescription": "Instruction prefetch requests" + }, + {, + "EventCode": "0xC898", + "EventName": "PM_LS3_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x488C", + "EventName": "PM_IC_PREF_WRITE", + "BriefDescription": "Instruction prefetch written into IL1" + }, + {, + "EventCode": "0xF89C", + "EventName": "PM_XLATE_MISS", + "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions" + }, + {, + "EventCode": "0x14158", + "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load" + }, + {, + "EventCode": "0x35156", + "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x268A6", + "EventName": "PM_TM_RST_SC", + "BriefDescription": "TM-snp rst RM SC" + }, + {, + "EventCode": "0x468A4", + "EventName": "PM_L3_TRANS_PF", + "BriefDescription": "L3 Transient prefetch received from L2" + }, + {, + "EventCode": "0x4094", + "EventName": "PM_IC_PREF_CANCEL_L2", + "BriefDescription": "L2 Squashed a demand or prefetch request" + }, + {, + "EventCode": "0x48AC", + "EventName": "PM_BR_MPRED_LSTACK", + "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction" + }, + {, + "EventCode": "0xE88C", + "EventName": "PM_LSU1_ERAT_HIT", + "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" + }, + {, + "EventCode": "0xC0B4", + "EventName": "PM_LSU_FLUSH_WRK_ARND", + "BriefDescription": "LSU workaround flush. These flushes are setup with programmable scan only latches to perform various actions when the flush macro receives a trigger from the dbg macros. These actions include things like flushing the next op encountered for a particular thread or flushing the next op that is NTC op that is encountered on a particular slice. The kind of flush that the workaround is setup to perform is highly variable." + }, + {, + "EventCode": "0x34054", + "EventName": "PM_PARTIAL_ST_FIN", + "BriefDescription": "Any store finished by an LSU slice" + }, + {, + "EventCode": "0x5880", + "EventName": "PM_THRD_PRIO_6_7_CYC", + "BriefDescription": "Cycles thread running at priority level 6 or 7" + }, + {, + "EventCode": "0x4898", + "EventName": "PM_IC_DEMAND_L2_BR_REDIRECT", + "BriefDescription": "L2 I cache demand request due to branch Mispredict ( 15 cycle path)" + }, + {, + "EventCode": "0x4880", + "EventName": "PM_BANK_CONFLICT", + "BriefDescription": "Read blocked due to interleave conflict. The ifar logic will detect an interleave conflict and kill the data that was read that cycle." + }, + {, + "EventCode": "0x360B0", + "EventName": "PM_L3_P0_SYS_PUMP", + "BriefDescription": "L3 PF sent with sys scope port 0, counts even retried requests" + }, + {, + "EventCode": "0x3006A", + "EventName": "PM_IERAT_RELOAD_64K", + "BriefDescription": "IERAT Reloaded (Miss) for a 64k page" + }, + {, + "EventCode": "0xD8BC", + "EventName": "PM_LSU2_3_LRQF_FULL_CYC", + "BriefDescription": "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ" + }, + {, + "EventCode": "0x46086", + "EventName": "PM_L2_SN_M_RD_DONE", + "BriefDescription": "SNP dispatched for a read and was M (true M)" + }, + {, + "EventCode": "0x40154", + "EventName": "PM_MRK_FAB_RSP_BKILL", + "BriefDescription": "Marked store had to do a bkill" + }, + {, + "EventCode": "0xF094", + "EventName": "PM_LSU2_L1_CAM_CANCEL", + "BriefDescription": "ls2 l1 tm cam cancel" + }, + {, + "EventCode": "0x2D014", + "EventName": "PM_CMPLU_STALL_LRQ_FULL", + "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full" + }, + {, + "EventCode": "0x3E05E", + "EventName": "PM_L3_CO_MEPF", + "BriefDescription": "L3 castouts in Mepf state for this thread" + }, + {, + "EventCode": "0x168A0", + "EventName": "PM_L3_CO_MEPF", + "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" + }, + {, + "EventCode": "0x460A2", + "EventName": "PM_L3_LAT_CI_HIT", + "BriefDescription": "L3 Lateral Castins Hit" + }, + {, + "EventCode": "0x3D14E", + "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load" + }, + {, + "EventCode": "0x3D15E", + "EventName": "PM_MULT_MRK", + "BriefDescription": "mult marked instr" + }, + {, + "EventCode": "0x4084", + "EventName": "PM_EAT_FULL_CYC", + "BriefDescription": "Cycles No room in EAT" + }, + {, + "EventCode": "0x5098", + "EventName": "PM_LINK_STACK_WRONG_ADD_PRED", + "BriefDescription": "Link stack predicts wrong address, because of link stack design limitation or software violating the coding conventions" + }, + {, + "EventCode": "0x2C050", + "EventName": "PM_DATA_GRP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load" + }, + {, + "EventCode": "0xC0A4", + "EventName": "PM_LSU2_FALSE_LHS", + "BriefDescription": "False LHS match detected" + }, + {, + "EventCode": "0x58A0", + "EventName": "PM_LINK_STACK_CORRECT", + "BriefDescription": "Link stack predicts right address" + }, + {, + "EventCode": "0x4C05A", + "EventName": "PM_DTLB_MISS_1G", + "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used" + }, + {, + "EventCode": "0x36886", + "EventName": "PM_L2_SN_SX_I_DONE", + "BriefDescription": "SNP dispatched and went from Sx to Ix" + }, + {, + "EventCode": "0x4E04A", + "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2C12C", + "EventName": "PM_MRK_DATA_FROM_DL4_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load" + }, + {, + "EventCode": "0x2608E", + "EventName": "PM_TM_LD_CONF", + "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)" + }, + {, + "EventCode": "0x4080", + "EventName": "PM_INST_FROM_L1", + "BriefDescription": "Instruction fetches from L1. L1 instruction hit" + }, + {, + "EventCode": "0xE898", + "EventName": "PM_LSU3_TM_L1_HIT", + "BriefDescription": "Load tm hit in L1" + }, + {, + "EventCode": "0x260A0", + "EventName": "PM_L3_CO_MEM", + "BriefDescription": "L3 CO to memory OR of port 0 and 1 (lossy = may undercount if two cresp come in the same cyc)" + }, + {, + "EventCode": "0x16082", + "EventName": "PM_L2_CASTOUT_MOD", + "BriefDescription": "L2 Castouts - Modified (M,Mu,Me)" + }, + {, + "EventCode": "0xC09C", + "EventName": "PM_LS0_LAUNCH_HELD_PREF", + "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle" + }, + {, + "EventCode": "0xC8B8", + "EventName": "PM_LSU_FLUSH_LARX_STCX", + "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches" + }, + {, + "EventCode": "0x260A6", + "EventName": "PM_NON_TM_RST_SC", + "BriefDescription": "Non-TM snp rst TM SC" + }, + {, + "EventCode": "0x3608A", + "EventName": "PM_L2_RTY_ST", + "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" + }, + {, + "EventCode": "0x4689E", + "EventName": "PM_L2_RTY_ST", + "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" + }, + {, + "EventCode": "0x24040", + "EventName": "PM_INST_FROM_L2_MEPF", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x209C", + "EventName": "PM_TM_FAV_TBEGIN", + "BriefDescription": "Dispatch time Favored tbegin" + }, + {, + "EventCode": "0x2D01E", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_ISSQ", + "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full" + }, + {, + "EventCode": "0x50A4", + "EventName": "PM_FLUSH_MPRED", + "BriefDescription": "Branch mispredict flushes. Includes target and address misprecition" + }, + {, + "EventCode": "0x508C", + "EventName": "PM_SHL_CREATED", + "BriefDescription": "Store-Hit-Load Table Entry Created" + }, + {, + "EventCode": "0x1504C", + "EventName": "PM_IPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request" + }, + {, + "EventCode": "0x268A4", + "EventName": "PM_L3_LD_MISS", + "BriefDescription": "L3 Misses for demand LDs" + }, + {, + "EventCode": "0x26088", + "EventName": "PM_L2_GRP_GUESS_CORRECT", + "BriefDescription": "L2 guess grp (GS or NNS) and guess was correct (data intra-group AND ^on-chip)" + }, + {, + "EventCode": "0xD088", + "EventName": "PM_LSU0_LDMX_FIN", + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." + }, + {, + "EventCode": "0xE8B4", + "EventName": "PM_LS1_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, + {, + "EventCode": "0x1688C", + "EventName": "PM_RC_USAGE", + "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" + }, + {, + "EventCode": "0x3F054", + "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache" + }, + {, + "EventCode": "0x2608A", + "EventName": "PM_ISIDE_DISP_FAIL_ADDR", + "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x50B4", + "EventName": "PM_TAGE_CORRECT_TAKEN_CMPL", + "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Counted at completion for taken branches only" + }, + {, + "EventCode": "0x2090", + "EventName": "PM_DISP_CLB_HELD_SB", + "BriefDescription": "Dispatch/CLB Hold: Scoreboard" + }, + {, + "EventCode": "0xE0B0", + "EventName": "PM_TM_FAIL_NON_TX_CONFLICT", + "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR" + }, + {, + "EventCode": "0xD198", + "EventName": "PM_MRK_LSU_FLUSH_ATOMIC", + "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed" + }, + {, + "EventCode": "0x201E0", + "EventName": "PM_MRK_DATA_FROM_MEMORY", + "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load" + }, + {, + "EventCode": "0x368A2", + "EventName": "PM_L3_L2_CO_MISS", + "BriefDescription": "L2 CO miss" + }, + {, + "EventCode": "0x3608C", + "EventName": "PM_CO0_BUSY", + "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x4608C", + "EventName": "PM_CO0_BUSY", + "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x2C122", + "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load" + }, + {, + "EventCode": "0x35154", + "EventName": "PM_MRK_DATA_FROM_L3_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load" + }, + {, + "EventCode": "0x1D140", + "EventName": "PM_MRK_DATA_FROM_L31_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x4404A", + "EventName": "PM_INST_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x28AC", + "EventName": "PM_TM_FAIL_SELF", + "BriefDescription": "TM aborted because a self-induced conflict occurred in Suspended state, due to one of the following: a store to a storage location that was previously accessed transactionally; a dcbf, dcbi, or icbi specify- ing a block that was previously accessed transactionally; a dcbst specifying a block that was previously written transactionally; or a tlbie that specifies a translation that was pre- viously used transactionally" + }, + {, + "EventCode": "0x45056", + "EventName": "PM_SCALAR_FLOP_CMPL", + "BriefDescription": "Scalar flop operation completed" + }, + {, + "EventCode": "0x16092", + "EventName": "PM_L2_LD_MISS_128B", + "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)" + }, + {, + "EventCode": "0x2E014", + "EventName": "PM_STCX_FIN", + "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed" + }, + {, + "EventCode": "0xE0B8", + "EventName": "PM_LS2_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, + {, + "EventCode": "0x2094", + "EventName": "PM_TM_OUTER_TBEGIN", + "BriefDescription": "Completion time outer tbegin" + }, + {, + "EventCode": "0x160B4", + "EventName": "PM_L3_P0_LCO_RTY", + "BriefDescription": "L3 initiated LCO received retry on port 0 (can try 4 times)" + }, + {, + "EventCode": "0x36892", + "EventName": "PM_DSIDE_OTHER_64B_L2MEMACC", + "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory that was for hpc_read64, (RC had to fetch other 64B of a line from MC) i.e., number of times RC had to go to memory to get 'missing' 64B" + }, + {, + "EventCode": "0x20A8", + "EventName": "PM_TM_FAIL_FOOTPRINT_OVERFLOW", + "BriefDescription": "TM aborted because the tracking limit for transactional storage accesses was exceeded.. Asynchronous" + }, + {, + "EventCode": "0x30018", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", + "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" + }, + {, + "EventCode": "0xC894", + "EventName": "PM_LS1_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x360A2", + "EventName": "PM_L3_L2_CO_HIT", + "BriefDescription": "L2 CO hits" + }, + {, + "EventCode": "0x36092", + "EventName": "PM_DSIDE_L2MEMACC", + "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory (excluding hpcread64 accesses), i.e., total memory accesses by RCs" + }, + {, + "EventCode": "0x10138", + "EventName": "PM_MRK_BR_2PATH", + "BriefDescription": "marked branches which are not strongly biased" + }, + {, + "EventCode": "0x2884", + "EventName": "PM_ISYNC", + "BriefDescription": "Isync completion count per thread" + }, + {, + "EventCode": "0x16882", + "EventName": "PM_L2_CASTOUT_SHR", + "BriefDescription": "L2 Castouts - Shared (Tx,Sx)" + }, + {, + "EventCode": "0xD884", + "EventName": "PM_LSU3_SET_MPRED", + "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" + }, + {, + "EventCode": "0x26092", + "EventName": "PM_L2_LD_MISS_64B", + "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.e., M=1)" + }, + {, + "EventCode": "0x26080", + "EventName": "PM_L2_LD_MISS", + "BriefDescription": "All successful D-Side Load dispatches that were an L2 miss for this thread" + }, + {, + "EventCode": "0x3D14C", + "EventName": "PM_MRK_DATA_FROM_DMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load" + }, + {, + "EventCode": "0x100FA", + "EventName": "PM_ANY_THRD_RUN_CYC", + "BriefDescription": "Cycles in which at least one thread has the run latch set" + }, + {, + "EventCode": "0x2C12A", + "EventName": "PM_MRK_DATA_FROM_RMEM_CYC", + "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load" + }, + {, + "EventCode": "0x25048", + "EventName": "PM_IPTEG_FROM_LMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" + }, + {, + "EventCode": "0x40006", + "EventName": "PM_ISLB_MISS", + "BriefDescription": "Number of ISLB misses for this thread" + }, + {, + "EventCode": "0xD8A8", + "EventName": "PM_ISLB_MISS", + "BriefDescription": "Instruction SLB miss - Total of all segment sizes" + }, + {, + "EventCode": "0xD19C", + "EventName": "PM_MRK_LSU_FLUSH_RELAUNCH_MISS", + "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent" + }, + {, + "EventCode": "0x260A2", + "EventName": "PM_L3_CI_HIT", + "BriefDescription": "L3 Castins Hit (total count)" + }, + {, + "EventCode": "0x44054", + "EventName": "PM_VECTOR_LD_CMPL", + "BriefDescription": "Number of vector load instructions completed" + }, + {, + "EventCode": "0x1E05C", + "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT" + }, + {, + "EventCode": "0x1608E", + "EventName": "PM_ST_CAUSED_FAIL", + "BriefDescription": "Non-TM Store caused any thread to fail" + }, + {, + "EventCode": "0x3080", + "EventName": "PM_ISU0_ISS_HOLD_ALL", + "BriefDescription": "All ISU rejects" + }, + {, + "EventCode": "0x1515A", + "EventName": "PM_SYNC_MRK_L2MISS", + "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt" + }, + {, + "EventCode": "0x26892", + "EventName": "PM_L2_ST_MISS_64B", + "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B (i.e., M=1)" + }, + {, + "EventCode": "0x2688C", + "EventName": "PM_CO_USAGE", + "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" + }, + {, + "EventCode": "0xD084", + "EventName": "PM_LSU2_SET_MPRED", + "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" + }, + {, + "EventCode": "0x48B8", + "EventName": "PM_BR_MPRED_TAKEN_TA", + "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event." + }, + {, + "EventCode": "0x50B0", + "EventName": "PM_BTAC_BAD_RESULT", + "BriefDescription": "BTAC thinks branch will be taken but it is either predicted not-taken by the BHT, or the target address is wrong (less common). In both cases, a redirect will happen" + }, + {, + "EventCode": "0xD888", + "EventName": "PM_LSU1_LDMX_FIN", + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." + }, + {, + "EventCode": "0x58B4", + "EventName": "PM_TAGE_CORRECT", + "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time" + }, + {, + "EventCode": "0x3688C", + "EventName": "PM_SN_USAGE", + "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" + }, + {, + "EventCode": "0x46084", + "EventName": "PM_L2_RCST_DISP_FAIL_OTHER", + "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to reason other than address collision" + }, + {, + "EventCode": "0xF0AC", + "EventName": "PM_DC_PREF_STRIDED_CONF", + "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." + }, + {, + "EventCode": "0x45054", + "EventName": "PM_FMA_CMPL", + "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " + }, + {, + "EventCode": "0x5090", + "EventName": "PM_SHL_ST_DISABLE", + "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)" + }, + {, + "EventCode": "0x201E8", + "EventName": "PM_THRESH_EXC_512", + "BriefDescription": "Threshold counter exceeded a value of 512" + }, + {, + "EventCode": "0x5084", + "EventName": "PM_DECODE_FUSION_EXT_ADD", + "BriefDescription": "32-bit extended addition" + }, + {, + "EventCode": "0x36080", + "EventName": "PM_L2_INST", + "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." + }, + {, + "EventCode": "0x3609E", + "EventName": "PM_L2_INST", + "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" + }, + {, + "EventCode": "0x3504C", + "EventName": "PM_IPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request" + }, + {, + "EventCode": "0xD890", + "EventName": "PM_LS1_DC_COLLISIONS", + "BriefDescription": "Read-write data cache collisions" + }, + {, + "EventCode": "0x1688A", + "EventName": "PM_ISIDE_DISP", + "BriefDescription": "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x468AA", + "EventName": "PM_L3_P1_CO_L31", + "BriefDescription": "L3 CO to L3.1 (LCO) port 1 with or without data" + }, + {, + "EventCode": "0x28B0", + "EventName": "PM_DISP_HELD_TBEGIN", + "BriefDescription": "This outer tbegin transaction cannot be dispatched until the previous tend instruction completes" + }, + {, + "EventCode": "0xE8A0", + "EventName": "PM_LSU3_TM_L1_MISS", + "BriefDescription": "Load tm L1 miss" + }, + {, + "EventCode": "0x2C05E", + "EventName": "PM_INST_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)" + }, + {, + "EventCode": "0xC8BC", + "EventName": "PM_STCX_SUCCESS_CMPL", + "BriefDescription": "Number of stcx instructions that completed successfully" + }, + {, + "EventCode": "0xE098", + "EventName": "PM_LSU2_TM_L1_HIT", + "BriefDescription": "Load tm hit in L1" + }, + {, + "EventCode": "0x44044", + "EventName": "PM_INST_FROM_L31_ECO_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x16886", + "EventName": "PM_CO_DISP_FAIL", + "BriefDescription": "CO dispatch failed due to all CO machines being busy" + }, + {, + "EventCode": "0x3D146", + "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load" + }, + {, + "EventCode": "0x16892", + "EventName": "PM_L2_ST_MISS_128B", + "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)" + }, + {, + "EventCode": "0x26890", + "EventName": "PM_ISIDE_L2MEMACC", + "BriefDescription": "Valid when first beat of data comes in for an I-side fetch where data came from memory" + }, + {, + "EventCode": "0xD094", + "EventName": "PM_LS2_DC_COLLISIONS", + "BriefDescription": "Read-write data cache collisions" + }, + {, + "EventCode": "0x3C05E", + "EventName": "PM_MEM_RWITM", + "BriefDescription": "Memory Read With Intent to Modify for this thread" + }, + {, + "EventCode": "0x26882", + "EventName": "PM_L2_DC_INV", + "BriefDescription": "D-cache invalidates sent over the reload bus to the core" + }, + {, + "EventCode": "0xC090", + "EventName": "PM_LSU_STCX", + "BriefDescription": "STCX sent to nest, i.e. total" + }, + {, + "EventCode": "0xD080", + "EventName": "PM_LSU0_SET_MPRED", + "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" + }, + {, + "EventCode": "0x2C120", + "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load" + }, + {, + "EventCode": "0x36086", + "EventName": "PM_L2_RC_ST_DONE", + "BriefDescription": "RC did store to line that was Tx or Sx" + }, + {, + "EventCode": "0xE8AC", + "EventName": "PM_TM_FAIL_TX_CONFLICT", + "BriefDescription": "Transactional conflict from LSU, gets reported to TEXASR" + }, + {, + "EventCode": "0x48A8", + "EventName": "PM_DECODE_FUSION_LD_ST_DISP", + "BriefDescription": "32-bit displacement D-form and 16-bit displacement X-form" + }, + {, + "EventCode": "0x3D144", + "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load" + }, + {, + "EventCode": "0x44046", + "EventName": "PM_INST_FROM_L21_MOD", + "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x40B0", + "EventName": "PM_BR_PRED_TAKEN_CR", + "BriefDescription": "Conditional Branch that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches" + }, + {, + "EventCode": "0x15040", + "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" + }, + {, + "EventCode": "0xD9A0", + "EventName": "PM_MRK_LSU_FLUSH_LHL_SHL", + "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)." + }, + {, + "EventCode": "0x35042", + "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request" + }, + {, + "EventCode": "0xF898", + "EventName": "PM_XLATE_RADIX_MODE", + "BriefDescription": "LSU reports every cycle the thread is in radix translation mode (as opposed to HPT mode)" + }, + {, + "EventCode": "0x2D142", + "EventName": "PM_MRK_DATA_FROM_L3_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load" + }, + {, + "EventCode": "0x160B0", + "EventName": "PM_L3_P0_NODE_PUMP", + "BriefDescription": "L3 PF sent with nodal scope port 0, counts even retried requests" + }, + {, + "EventCode": "0xD88C", + "EventName": "PM_LSU3_LDMX_FIN", + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." + }, + {, + "EventCode": "0x36882", + "EventName": "PM_L2_LD_HIT", + "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x2609E", + "EventName": "PM_L2_LD_HIT", + "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread" + }, + {, + "EventCode": "0x168AC", + "EventName": "PM_L3_CI_USAGE", + "BriefDescription": "Rotating sample of 16 CI or CO actives" + }, + {, + "EventCode": "0x20134", + "EventName": "PM_MRK_FXU_FIN", + "BriefDescription": "fxu marked instr finish" + }, + {, + "EventCode": "0x4608E", + "EventName": "PM_TM_CAP_OVERFLOW", + "BriefDescription": "TM Footprint Capacity Overflow" + }, + {, + "EventCode": "0x4F05C", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache" + }, + {, + "EventCode": "0x40014", + "EventName": "PM_PROBE_NOP_DISP", + "BriefDescription": "ProbeNops dispatched" + }, + {, + "EventCode": "0x58A8", + "EventName": "PM_DECODE_HOLD_ICT_FULL", + "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread" + }, + {, + "EventCode": "0x10052", + "EventName": "PM_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x2505E", + "EventName": "PM_BACK_BR_CMPL", + "BriefDescription": "Branch instruction completed with a target address less than current instruction address" + }, + {, + "EventCode": "0x2688A", + "EventName": "PM_ISIDE_DISP_FAIL_OTHER", + "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)" + }, + {, + "EventCode": "0x2001A", + "EventName": "PM_NTC_ALL_FIN", + "BriefDescription": "Cycles after all instructions have finished to group completed" + }, + {, + "EventCode": "0x3005A", + "EventName": "PM_ISQ_0_8_ENTRIES", + "BriefDescription": "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread" + }, + {, + "EventCode": "0x3515E", + "EventName": "PM_MRK_BACK_BR_CMPL", + "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address" + }, + {, + "EventCode": "0xF890", + "EventName": "PM_LSU1_L1_CAM_CANCEL", + "BriefDescription": "ls1 l1 tm cam cancel" + }, + {, + "EventCode": "0xE884", + "EventName": "PM_LS1_ERAT_MISS_PREF", + "BriefDescription": "LS1 Erat miss due to prefetch" + }, + {, + "EventCode": "0xE89C", + "EventName": "PM_LSU1_TM_L1_MISS", + "BriefDescription": "Load tm L1 miss" + }, + {, + "EventCode": "0x28A8", + "EventName": "PM_TM_FAIL_CONF_NON_TM", + "BriefDescription": "TM aborted because a conflict occurred with a non-transactional access by another processor" + }, + {, + "EventCode": "0x16890", + "EventName": "PM_L1PF_L2MEMACC", + "BriefDescription": "Valid when first beat of data comes in for an L1PF where data came from memory" + }, + {, + "EventCode": "0x4504C", + "EventName": "PM_IPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request" + }, + {, + "EventCode": "0x1002E", + "EventName": "PM_LMQ_MERGE", + "BriefDescription": "A demand miss collides with a prefetch for the same line" + }, + {, + "EventCode": "0x160B6", + "EventName": "PM_L3_WI0_BUSY", + "BriefDescription": "Rotating sample of 8 WI valid" + }, + {, + "EventCode": "0x260B6", + "EventName": "PM_L3_WI0_BUSY", + "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" + }, + {, + "EventCode": "0x368AC", + "EventName": "PM_L3_CO0_BUSY", + "BriefDescription": "Lifetime, sample of CO machine 0 valid" + }, + {, + "EventCode": "0x468AC", + "EventName": "PM_L3_CO0_BUSY", + "BriefDescription": "Lifetime, sample of CO machine 0 valid" + }, + {, + "EventCode": "0x2E040", + "EventName": "PM_DPTEG_FROM_L2_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x1D152", + "EventName": "PM_MRK_DATA_FROM_DL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load" + }, + {, + "EventCode": "0x46880", + "EventName": "PM_ISIDE_MRU_TOUCH", + "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread" + }, + {, + "EventCode": "0x1C05C", + "EventName": "PM_DTLB_MISS_2M", + "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used" + }, + {, + "EventCode": "0x50B8", + "EventName": "PM_TAGE_OVERRIDE_WRONG", + "BriefDescription": "The TAGE overrode BHT direction prediction but it was incorrect. Counted at completion for taken branches only" + }, + {, + "EventCode": "0x160AE", + "EventName": "PM_L3_P0_PF_RTY", + "BriefDescription": "L3 PF received retry port 0, every retry counted" + }, + {, + "EventCode": "0x260AE", + "EventName": "PM_L3_P0_PF_RTY", + "BriefDescription": "L3 PF received retry port 0, every retry counted" + }, + {, + "EventCode": "0x268B2", + "EventName": "PM_L3_LOC_GUESS_WRONG", + "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" + }, + {, + "EventCode": "0x36088", + "EventName": "PM_L2_SYS_GUESS_CORRECT", + "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)" + }, + {, + "EventCode": "0x589C", + "EventName": "PM_PTESYNC", + "BriefDescription": "ptesync instruction counted when the instruction is decoded and transmitted" + }, + {, + "EventCode": "0x26086", + "EventName": "PM_CO_TM_SC_FOOTPRINT", + "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus" + }, + {, + "EventCode": "0x1E05A", + "EventName": "PM_CMPLU_STALL_ANY_SYNC", + "BriefDescription": "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete" + }, + {, + "EventCode": "0xF090", + "EventName": "PM_LSU0_L1_CAM_CANCEL", + "BriefDescription": "ls0 l1 tm cam cancel" + }, + {, + "EventCode": "0xC0A8", + "EventName": "PM_LSU_FLUSH_CI", + "BriefDescription": "Load was not issued to LSU as a cache inhibited (non-cacheable) load but it was later determined to be cache inhibited" + }, + {, + "EventCode": "0x20AC", + "EventName": "PM_TM_FAIL_CONF_TM", + "BriefDescription": "TM aborted because a conflict occurred with another transaction." + }, + {, + "EventCode": "0x588C", + "EventName": "PM_SHL_ST_DEP_CREATED", + "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled" + }, + {, + "EventCode": "0x360AC", + "EventName": "PM_L3_SN0_BUSY", + "BriefDescription": "Lifetime, sample of snooper machine 0 valid" + }, + {, + "EventCode": "0x460AC", + "EventName": "PM_L3_SN0_BUSY", + "BriefDescription": "Lifetime, sample of snooper machine 0 valid" + }, + {, + "EventCode": "0x3005C", + "EventName": "PM_BFU_BUSY", + "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity" + }, + {, + "EventCode": "0x48A0", + "EventName": "PM_BR_PRED_PCACHE", + "BriefDescription": "Conditional branch completed that used pattern cache prediction" + }, + {, + "EventCode": "0x26880", + "EventName": "PM_L2_ST_MISS", + "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" + }, + {, + "EventCode": "0xF8B4", + "EventName": "PM_DC_PREF_XCONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" + }, + {, + "EventCode": "0x35048", + "EventName": "PM_IPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request" + }, + {, + "EventCode": "0x260A8", + "EventName": "PM_L3_PF_HIT_L3", + "BriefDescription": "L3 PF hit in L3 (abandoned)" + }, + {, + "EventCode": "0x360B4", + "EventName": "PM_L3_PF0_BUSY", + "BriefDescription": "Lifetime, sample of PF machine 0 valid" + }, + {, + "EventCode": "0x460B4", + "EventName": "PM_L3_PF0_BUSY", + "BriefDescription": "Lifetime, sample of PF machine 0 valid" + }, + {, + "EventCode": "0xC0B0", + "EventName": "PM_LSU_FLUSH_UE", + "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" + }, + {, + "EventCode": "0x4013A", + "EventName": "PM_MRK_IC_MISS", + "BriefDescription": "Marked instruction experienced I cache miss" + }, + {, + "EventCode": "0x2088", + "EventName": "PM_FLUSH_DISP_SB", + "BriefDescription": "Dispatch Flush: Scoreboard" + }, + {, + "EventCode": "0x401E8", + "EventName": "PM_MRK_DATA_FROM_L2MISS", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load" + }, + {, + "EventCode": "0x3688E", + "EventName": "PM_TM_ST_CAUSED_FAIL", + "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail" + }, + {, + "EventCode": "0x460B2", + "EventName": "PM_L3_SYS_GUESS_WRONG", + "BriefDescription": "Initial scope=system (VGS or RNS) but data from local or near. Prediction too high" + }, + {, + "EventCode": "0x58B8", + "EventName": "PM_TAGE_OVERRIDE_WRONG_SPEC", + "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time" + }, + {, + "EventCode": "0xE890", + "EventName": "PM_LSU3_ERAT_HIT", + "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" + }, + {, + "EventCode": "0x2898", + "EventName": "PM_TM_TABORT_TRECLAIM", + "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim" + }, + {, + "EventCode": "0x4C054", + "EventName": "PM_DERAT_MISS_16G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G" + }, + {, + "EventCode": "0x268A0", + "EventName": "PM_L3_CO_L31", + "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 (lossy = may undercount if two cresps come in the same cyc)" + }, + {, + "EventCode": "0x5080", + "EventName": "PM_THRD_PRIO_4_5_CYC", + "BriefDescription": "Cycles thread running at priority level 4 or 5" + }, + {, + "EventCode": "0x2505C", + "EventName": "PM_VSU_FIN", + "BriefDescription": "VSU instruction finished. Up to 4 per cycle" + }, + {, + "EventCode": "0x40A4", + "EventName": "PM_BR_PRED_CCACHE", + "BriefDescription": "Conditional Branch Completed that used the Count Cache for Target Prediction" + }, + {, + "EventCode": "0x2E04A", + "EventName": "PM_DPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D12E", + "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load" + }, + {, + "EventCode": "0xC8B4", + "EventName": "PM_LSU_FLUSH_LHL_SHL", + "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)." + }, + {, + "EventCode": "0x58A4", + "EventName": "PM_FLUSH_LSU", + "BriefDescription": "LSU flushes. Includes all lsu flushes" + }, + {, + "EventCode": "0x1D150", + "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load" + }, + {, + "EventCode": "0xC8A0", + "EventName": "PM_LSU1_FALSE_LHS", + "BriefDescription": "False LHS match detected" + }, + {, + "EventCode": "0x48BC", + "EventName": "PM_THRD_PRIO_2_3_CYC", + "BriefDescription": "Cycles thread running at priority level 2 or 3" + }, + {, + "EventCode": "0x10134", + "EventName": "PM_MRK_ST_DONE_L2", + "BriefDescription": "marked store completed in L2 ( RC machine done)" + }, + {, + "EventCode": "0x368B2", + "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", + "BriefDescription": "Initial scope=group (GS or NNS) but data from local node. Prediction too high" + }, + {, + "EventCode": "0xE8BC", + "EventName": "PM_LS1_PTE_TABLEWALK_CYC", + "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 1" + }, + {, + "EventCode": "0x1F152", + "EventName": "PM_MRK_FAB_RSP_BKILL_CYC", + "BriefDescription": "cycles L2 RC took for a bkill" + }, + {, + "EventCode": "0x4C124", + "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC", + "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load" + }, + {, + "EventCode": "0x2F14A", + "EventName": "PM_MRK_DPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x26888", + "EventName": "PM_L2_GRP_GUESS_WRONG", + "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)" + }, + {, + "EventCode": "0x368AE", + "EventName": "PM_L3_P1_CO_RTY", + "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" + }, + {, + "EventCode": "0x468AE", + "EventName": "PM_L3_P1_CO_RTY", + "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" + }, + {, + "EventCode": "0xC0AC", + "EventName": "PM_LSU_FLUSH_EMSH", + "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" + }, + {, + "EventCode": "0x260B2", + "EventName": "PM_L3_SYS_GUESS_CORRECT", + "BriefDescription": "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)" + }, + {, + "EventCode": "0x1D146", + "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC", + "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load" + }, + {, + "EventCode": "0xE094", + "EventName": "PM_LSU0_TM_L1_HIT", + "BriefDescription": "Load tm hit in L1" + }, + {, + "EventCode": "0x46888", + "EventName": "PM_L2_GROUP_PUMP", + "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" + }, + {, + "EventCode": "0xF0B0", + "EventName": "PM_L3_LD_PREF", + "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" + }, + {, + "EventCode": "0x16080", + "EventName": "PM_L2_LD", + "BriefDescription": "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)" + }, + {, + "EventCode": "0x4505C", + "EventName": "PM_MATH_FLOP_CMPL", + "BriefDescription": "Math flop instruction completed" + }, + {, + "EventCode": "0x368B0", + "EventName": "PM_L3_P1_SYS_PUMP", + "BriefDescription": "L3 PF sent with sys scope port 1, counts even retried requests" + }, + {, + "EventCode": "0x1F146", + "EventName": "PM_MRK_DPTEG_FROM_L31_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2000C", + "EventName": "PM_THRD_ALL_RUN_CYC", + "BriefDescription": "Cycles in which all the threads have the run latch set" + }, + {, + "EventCode": "0xC0BC", + "EventName": "PM_LSU_FLUSH_OTHER", + "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval” back and flush all younger ops)" + }, + {, + "EventCode": "0x5094", + "EventName": "PM_IC_MISS_ICBI", + "BriefDescription": "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out" + }, + {, + "EventCode": "0xC8A8", + "EventName": "PM_LSU_FLUSH_ATOMIC", + "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed" + }, + {, + "EventCode": "0x1E04E", + "EventName": "PM_DPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D05E", + "EventName": "PM_BR_CMPL", + "BriefDescription": "Any Branch instruction completed" + }, + {, + "EventCode": "0x260B0", + "EventName": "PM_L3_P0_GRP_PUMP", + "BriefDescription": "L3 PF sent with grp scope port 0, counts even retried requests" + }, + {, + "EventCode": "0x30132", + "EventName": "PM_MRK_VSU_FIN", + "BriefDescription": "VSU marked instr finish" + }, + {, + "EventCode": "0x2D120", + "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE", + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" + }, + {, + "EventCode": "0x1E048", + "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x16086", + "EventName": "PM_L2_SN_M_WR_DONE", + "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" + }, + {, + "EventCode": "0x46886", + "EventName": "PM_L2_SN_M_WR_DONE", + "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" + }, + {, + "EventCode": "0x489C", + "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", + "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" + }, + {, + "EventCode": "0xF0B8", + "EventName": "PM_LS0_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0x20132", + "EventName": "PM_MRK_DFU_FIN", + "BriefDescription": "Decimal Unit marked Instruction Finish" + }, + {, + "EventCode": "0x160A6", + "EventName": "PM_TM_SC_CO", + "BriefDescription": "L3 castout TM SC line" + }, + {, + "EventCode": "0xC8B0", + "EventName": "PM_LSU_FLUSH_LHS", + "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" + }, + {, + "EventCode": "0x3F150", + "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", + "BriefDescription": "cycles to drain st from core to L2" + }, + {, + "EventCode": "0x168A4", + "EventName": "PM_L3_MISS", + "BriefDescription": "L3 Misses (L2 miss also missing L3, including data/instrn/xlate)" + }, + {, + "EventCode": "0xF080", + "EventName": "PM_LSU_STCX_FAIL", + "BriefDescription": "" + }, + {, + "EventCode": "0x30038", + "EventName": "PM_CMPLU_STALL_DMISS_LMEM", + "BriefDescription": "Completion stall due to cache miss that resolves in local memory" + }, + {, + "EventCode": "0x28A4", + "EventName": "PM_MRK_TEND_FAIL", + "BriefDescription": "Nested or not nested tend failed for a marked tend instruction" + }, + {, + "EventCode": "0x100FC", + "EventName": "PM_LD_REF_L1", + "BriefDescription": "All L1 D cache load references counted at finish, gated by reject" + }, + {, + "EventCode": "0xC0A0", + "EventName": "PM_LSU0_FALSE_LHS", + "BriefDescription": "False LHS match detected" + }, + {, + "EventCode": "0x468A8", + "EventName": "PM_SN_MISS", + "BriefDescription": "Any port snooper L3 miss or collision. Up to 4 can happen in a cycle but we only count 1" + }, + {, + "EventCode": "0x36888", + "EventName": "PM_L2_SYS_GUESS_WRONG", + "BriefDescription": "L2 guess system (VGS or RNS) and guess was not correct (ie data ^beyond-group)" + }, + {, + "EventCode": "0x2080", + "EventName": "PM_EE_OFF_EXT_INT", + "BriefDescription": "CyclesMSR[EE] is off and external interrupts are active" + }, + {, + "EventCode": "0xE8B8", + "EventName": "PM_LS3_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, + {, + "EventCode": "0x2688E", + "EventName": "PM_TM_FAV_CAUSED_FAIL", + "BriefDescription": "TM Load (fav) caused another thread to fail" + }, + {, + "EventCode": "0x16090", + "EventName": "PM_SN0_BUSY", + "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x26090", + "EventName": "PM_SN0_BUSY", + "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" + }, + {, + "EventCode": "0x360AE", + "EventName": "PM_L3_P0_CO_RTY", + "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" + }, + {, + "EventCode": "0x460AE", + "EventName": "PM_L3_P0_CO_RTY", + "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" + }, + {, + "EventCode": "0x168A8", + "EventName": "PM_L3_WI_USAGE", + "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid" + }, + {, + "EventCode": "0x468A2", + "EventName": "PM_L3_LAT_CI_MISS", + "BriefDescription": "L3 Lateral Castins Miss" + }, + {, + "EventCode": "0x4090", + "EventName": "PM_IC_PREF_CANCEL_PAGE", + "BriefDescription": "Prefetch Canceled due to page boundary" + }, + {, + "EventCode": "0xF09C", + "EventName": "PM_SLB_TABLEWALK_CYC", + "BriefDescription": "Cycles when a tablewalk is pending on this thread on the SLB table" + }, + {, + "EventCode": "0x460AA", + "EventName": "PM_L3_P0_CO_L31", + "BriefDescription": "L3 CO to L3.1 (LCO) port 0 with or without data" + }, + {, + "EventCode": "0x2880", + "EventName": "PM_FLUSH_DISP", + "BriefDescription": "Dispatch flush" + }, + {, + "EventCode": "0x168AE", + "EventName": "PM_L3_P1_PF_RTY", + "BriefDescription": "L3 PF received retry port 1, every retry counted" + }, + {, + "EventCode": "0x268AE", + "EventName": "PM_L3_P1_PF_RTY", + "BriefDescription": "L3 PF received retry port 3, every retry counted" + }, + {, + "EventCode": "0x46082", + "EventName": "PM_L2_ST_DISP", + "BriefDescription": "All successful D-side store dispatches for this thread " + }, + {, + "EventCode": "0x1689E", + "EventName": "PM_L2_ST_DISP", + "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" + }, + {, + "EventCode": "0x36880", + "EventName": "PM_L2_INST_MISS", + "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" + }, + {, + "EventCode": "0x4609E", + "EventName": "PM_L2_INST_MISS", + "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" + }, + {, + "EventCode": "0xE084", + "EventName": "PM_LS0_ERAT_MISS_PREF", + "BriefDescription": "LS0 Erat miss due to prefetch" + }, + {, + "EventCode": "0x409C", + "EventName": "PM_BR_PRED", + "BriefDescription": "Conditional Branch Executed in which the HW predicted the Direction or Target. Includes taken and not taken and is counted at execution time" + }, + {, + "EventCode": "0x2D144", + "EventName": "PM_MRK_DATA_FROM_L31_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x360A4", + "EventName": "PM_L3_CO_LCO", + "BriefDescription": "Total L3 COs occurred on LCO L3.1 (good cresp, may end up in mem on a retry)" + }, + {, + "EventCode": "0x4890", + "EventName": "PM_IC_PREF_CANCEL_HIT", + "BriefDescription": "Prefetch Canceled due to icache hit" + }, + {, + "EventCode": "0x268A8", + "EventName": "PM_RD_HIT_PF", + "BriefDescription": "RD machine hit L3 PF machine" + }, + {, + "EventCode": "0x16880", + "EventName": "PM_L2_ST", + "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" + }, + {, + "EventCode": "0x4098", + "EventName": "PM_IC_DEMAND_L2_BHT_REDIRECT", + "BriefDescription": "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)" + }, + {, + "EventCode": "0xD0B4", + "EventName": "PM_LSU0_SRQ_S0_VALID_CYC", + "BriefDescription": "Slot 0 of SRQ valid" + }, + {, + "EventCode": "0x160AA", + "EventName": "PM_L3_P0_LCO_NO_DATA", + "BriefDescription": "Dataless L3 LCO sent port 0" + }, + {, + "EventCode": "0x208C", + "EventName": "PM_CLB_HELD", + "BriefDescription": "CLB (control logic block - indicates quadword fetch block) Hold: Any Reason" + }, + {, + "EventCode": "0xF88C", + "EventName": "PM_LSU3_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0x200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "# PPC Dispatched" + }, + {, + "EventCode": "0x300F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "# PPC Dispatched" + }, + {, + "EventCode": "0x4E05E", + "EventName": "PM_TM_OUTER_TBEGIN_DISP", + "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions" + }, + {, + "EventCode": "0x2D018", + "EventName": "PM_CMPLU_STALL_EXEC_UNIT", + "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)" + }, + {, + "EventCode": "0x20B0", + "EventName": "PM_LSU_FLUSH_NEXT", + "BriefDescription": "LSU flush next reported at flush time. Sometimes these also come with an exception" + }, + {, + "EventCode": "0x3880", + "EventName": "PM_ISU2_ISS_HOLD_ALL", + "BriefDescription": "All ISU rejects" + }, + {, + "EventCode": "0x46882", + "EventName": "PM_L2_ST_HIT", + "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + }, + {, + "EventCode": "0x2689E", + "EventName": "PM_L2_ST_HIT", + "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread" + }, + {, + "EventCode": "0x360A8", + "EventName": "PM_L3_CO", + "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))" + }, + {, + "EventCode": "0x368A4", + "EventName": "PM_L3_CINJ", + "BriefDescription": "L3 castin of cache inject" + }, + {, + "EventCode": "0xC890", + "EventName": "PM_LSU_NCST", + "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1" + }, + {, + "EventCode": "0xD880", + "EventName": "PM_LSU1_SET_MPRED", + "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" + }, + {, + "EventCode": "0xD0B8", + "EventName": "PM_LSU_LMQ_FULL_CYC", + "BriefDescription": "Counts the number of cycles the LMQ is full" + }, + {, + "EventCode": "0x168B2", + "EventName": "PM_L3_GRP_GUESS_CORRECT", + "BriefDescription": "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)" + }, + {, + "EventCode": "0x48A4", + "EventName": "PM_STOP_FETCH_PENDING_CYC", + "BriefDescription": "Fetching is stopped due to an incoming instruction that will result in a flush" + }, + {, + "EventCode": "0x36884", + "EventName": "PM_L2_RCST_DISP_FAIL_ADDR", + "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ" + }, + {, + "EventCode": "0x260AC", + "EventName": "PM_L3_PF_USAGE", + "BriefDescription": "Rotating sample of 32 PF actives" } ] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index 077c3527967f..47a82568a8df 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -1,680 +1,557 @@ [ {, - "EventCode": "0x1E", - "EventName": "PM_CYC", - "BriefDescription": "Cycles", - "PublicDescription": "" - }, - {, - "EventCode": "0x100F0", - "EventName": "PM_CYC", - "BriefDescription": "Cycles", - "PublicDescription": "" - }, - {, - "EventCode": "0x2", - "EventName": "PM_INST_CMPL", - "BriefDescription": "Number of PowerPC Instructions that completed.", - "PublicDescription": "" - }, - {, - "EventCode": "0x100FE", - "EventName": "PM_INST_CMPL", - "BriefDescription": "# PPC instructions completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x10006", - "EventName": "PM_DISP_HELD", - "BriefDescription": "Dispatch Held", - "PublicDescription": "" - }, - {, - "EventCode": "0x10016", - "EventName": "PM_DSLB_MISS", - "BriefDescription": "Data SLB Miss - Total of all segment sizes", - "PublicDescription": "" - }, - {, - "EventCode": "0x10018", - "EventName": "PM_IC_DEMAND_CYC", - "BriefDescription": "Icache miss demand cycles", - "PublicDescription": "" - }, - {, - "EventCode": "0x10022", - "EventName": "PM_PMC2_SAVED", - "BriefDescription": "PMC2 Rewind Value saved", - "PublicDescription": "" - }, - {, - "EventCode": "0x10024", - "EventName": "PM_PMC5_OVERFLOW", - "BriefDescription": "Overflow from counter 5", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D14A", - "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E040", - "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E04A", - "EventName": "PM_DPTEG_FROM_RL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F140", - "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F142", - "EventName": "PM_MRK_DPTEG_FROM_L2", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F144", - "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F148", - "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x10050", - "EventName": "PM_CHIP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x10054", - "EventName": "PM_PUMP_CPRED", - "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x10056", - "EventName": "PM_MEM_READ", - "BriefDescription": "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4", - "PublicDescription": "" - }, - {, - "EventCode": "0x1005A", - "EventName": "PM_CMPLU_STALL_DFLONG", - "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C056", - "EventName": "PM_DERAT_MISS_4K", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C05A", - "EventName": "PM_DERAT_MISS_2M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x14050", - "EventName": "PM_INST_CHIP_PUMP_CPRED", - "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x14052", - "EventName": "PM_INST_GRP_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D154", - "EventName": "PM_MRK_DATA_FROM_L2.1_SHR_CYC", - "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x15156", - "EventName": "PM_SYNC_MRK_FX_DIVIDE", - "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E054", - "EventName": "PM_CMPLU_STALL", - "BriefDescription": "Nothing completed and ICT not empty", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F058", - "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2", - "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x10062", - "EventName": "PM_LD_L3MISS_PEND_CYC", - "BriefDescription": "Cycles L3 miss was pending for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x10064", - "EventName": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN", - "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", - "PublicDescription": "" - }, - {, - "EventCode": "0x10068", - "EventName": "PM_BRU_FIN", - "BriefDescription": "Branch Instruction Finished", - "PublicDescription": "" + "EventCode": "0x4D04C", + "EventName": "PM_DFU_BUSY", + "BriefDescription": "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity" }, {, "EventCode": "0x100F6", "EventName": "PM_IERAT_RELOAD", - "BriefDescription": "Number of I-ERAT reloads", - "PublicDescription": "" - }, - {, - "EventCode": "0x100F8", - "EventName": "PM_ICT_NOSLOT_CYC", - "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x20008", - "EventName": "PM_ICT_EMPTY_CYC", - "BriefDescription": "Cycles in which the ICT is completely empty. No itags are assigned to any thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x2000A", - "EventName": "PM_HV_CYC", - "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration", - "PublicDescription": "" - }, - {, - "EventCode": "0x2000E", - "EventName": "PM_FXU_BUSY", - "BriefDescription": "Cycles in which all 4 FXUs are busy. The FXU is running at capacity", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C018", - "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", - "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D012", - "EventName": "PM_CMPLU_STALL_DFU", - "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D01A", - "EventName": "PM_ICT_NOSLOT_IC_MISS", - "BriefDescription": "Ict empty for this thread due to Icache Miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E012", - "EventName": "PM_TM_TX_PASS_RUN_CYC", - "BriefDescription": "cycles spent in successful transactions", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E016", - "EventName": "PM_NTC_ISSUE_HELD_ARB", - "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C046", - "EventName": "PM_DATA_FROM_RL2L3_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2404C", - "EventName": "PM_INST_FROM_MEMORY", - "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D14E", - "EventName": "PM_MRK_DATA_FROM_L2.1_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E042", - "EventName": "PM_DPTEG_FROM_L3_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E046", - "EventName": "PM_DPTEG_FROM_RL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F142", - "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F144", - "EventName": "PM_MRK_DPTEG_FROM_L3.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F146", - "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2F14C", - "EventName": "PM_MRK_DPTEG_FROM_MEMORY", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x20052", - "EventName": "PM_GRP_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C054", - "EventName": "PM_DERAT_MISS_64K", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C05A", - "EventName": "PM_DERAT_MISS_1G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x24052", - "EventName": "PM_FXU_IDLE", - "BriefDescription": "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle", - "PublicDescription": "" - }, - {, - "EventCode": "0x2405A", - "EventName": "PM_NTC_FIN", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D150", - "EventName": "PM_MRK_DERAT_MISS_4K", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D152", - "EventName": "PM_MRK_DERAT_MISS_2M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x20066", - "EventName": "PM_TLB_MISS", - "BriefDescription": "TLB Miss (I + D)", - "PublicDescription": "" + "BriefDescription": "Number of I-ERAT reloads" }, {, "EventCode": "0x201E2", "EventName": "PM_MRK_LD_MISS_L1", - "BriefDescription": "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load.", - "PublicDescription": "" - }, - {, - "EventCode": "0x200F4", - "EventName": "PM_RUN_CYC", - "BriefDescription": "Run_cycles", - "PublicDescription": "" - }, - {, - "EventCode": "0x200F8", - "EventName": "PM_EXT_INT", - "BriefDescription": "external interrupt", - "PublicDescription": "" - }, - {, - "EventCode": "0x30004", - "EventName": "PM_CMPLU_STALL_EMQ_FULL", - "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", - "PublicDescription": "" - }, - {, - "EventCode": "0x30020", - "EventName": "PM_PMC2_REWIND", - "BriefDescription": "PMC2 Rewind Event (did not match condition)", - "PublicDescription": "" - }, - {, - "EventCode": "0x30022", - "EventName": "PM_PMC4_SAVED", - "BriefDescription": "PMC4 Rewind Value saved (matched condition)", - "PublicDescription": "" - }, - {, - "EventCode": "0x30024", - "EventName": "PM_PMC6_OVERFLOW", - "BriefDescription": "Overflow from counter 6", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C04C", - "EventName": "PM_DATA_FROM_DL4", - "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D148", - "EventName": "PM_MRK_DATA_FROM_L2.1_MOD_CYC", - "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E042", - "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E046", - "EventName": "PM_DPTEG_FROM_L2.1_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F148", - "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F14C", - "EventName": "PM_MRK_DPTEG_FROM_DL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x30056", - "EventName": "PM_TM_ABORTS", - "BriefDescription": "Number of TM transactions aborted", - "PublicDescription": "" - }, - {, - "EventCode": "0x30058", - "EventName": "PM_TLBIE_FIN", - "BriefDescription": "tlbie finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C054", - "EventName": "PM_DERAT_MISS_16M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M", - "PublicDescription": "" - }, - {, - "EventCode": "0x34058", - "EventName": "PM_ICT_NOSLOT_BR_MPRED_ICMISS", - "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", - "PublicDescription": "" - }, - {, - "EventCode": "0x3405C", - "EventName": "PM_CMPLU_STALL_DPLONG", - "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x3405E", - "EventName": "PM_IFETCH_THROTTLE", - "BriefDescription": "Cycles in which Instruction fetch throttle was active.", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D152", - "EventName": "PM_MRK_DERAT_MISS_1G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D154", - "EventName": "PM_MRK_DERAT_MISS_16M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D058", - "EventName": "PM_VSU_DP_FSQRT_FDIV", - "BriefDescription": "vector versions of fdiv,fsqrt", - "PublicDescription": "" - }, - {, - "EventCode": "0x35150", - "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E052", - "EventName": "PM_ICT_NOSLOT_IC_L3", - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F05A", - "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L3", - "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x300FC", - "EventName": "PM_DTLB_MISS", - "BriefDescription": "Data PTEG reload", - "PublicDescription": "" - }, - {, - "EventCode": "0x40004", - "EventName": "PM_FXU_FIN", - "BriefDescription": "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete.", - "PublicDescription": "" + "BriefDescription": "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." }, {, "EventCode": "0x40010", "EventName": "PM_PMC3_OVERFLOW", - "BriefDescription": "Overflow from counter 3", - "PublicDescription": "" + "BriefDescription": "Overflow from counter 3" }, {, - "EventCode": "0x4C012", - "EventName": "PM_CMPLU_STALL_ERAT_MISS", - "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D01C", - "EventName": "PM_ICT_NOSLOT_DISP_HELD_SYNC", - "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D01E", - "EventName": "PM_ICT_NOSLOT_BR_MPRED", - "BriefDescription": "Ict empty for this thread due to branch mispred", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E010", - "EventName": "PM_ICT_NOSLOT_IC_L3MISS", - "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E01A", - "EventName": "PM_ICT_NOSLOT_DISP_HELD", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C120", - "EventName": "PM_MRK_DATA_FROM_L2_MEPF", - "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D124", - "EventName": "PM_MRK_DATA_FROM_L3.1_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C04C", - "EventName": "PM_DATA_FROM_DMEM", - "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load", - "PublicDescription": "" + "EventCode": "0x1005A", + "EventName": "PM_CMPLU_STALL_DFLONG", + "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle" }, {, "EventCode": "0x4D140", "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE", - "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load", - "PublicDescription": "" + "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load" }, {, - "EventCode": "0x4D04C", - "EventName": "PM_DFU_BUSY", - "BriefDescription": "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity", - "PublicDescription": "" + "EventCode": "0x3F14C", + "EventName": "PM_MRK_DPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4D04E", - "EventName": "PM_VSU_FSQRT_FDIV", - "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only", - "PublicDescription": "" + "EventCode": "0x1E040", + "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4E046", - "EventName": "PM_DPTEG_FROM_L2.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventCode": "0x24052", + "EventName": "PM_FXU_IDLE", + "BriefDescription": "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle" + }, + {, + "EventCode": "0x1E054", + "EventName": "PM_CMPLU_STALL", + "BriefDescription": "Nothing completed and ICT not empty" + }, + {, + "EventCode": "0x2", + "EventName": "PM_INST_CMPL", + "BriefDescription": "Number of PowerPC Instructions that completed." + }, + {, + "EventCode": "0x3D058", + "EventName": "PM_VSU_DP_FSQRT_FDIV", + "BriefDescription": "vector versions of fdiv,fsqrt" + }, + {, + "EventCode": "0x10006", + "EventName": "PM_DISP_HELD", + "BriefDescription": "Dispatch Held" + }, + {, + "EventCode": "0x3D154", + "EventName": "PM_MRK_DERAT_MISS_16M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M" + }, + {, + "EventCode": "0x200F8", + "EventName": "PM_EXT_INT", + "BriefDescription": "external interrupt" + }, + {, + "EventCode": "0x20008", + "EventName": "PM_ICT_EMPTY_CYC", + "BriefDescription": "Cycles in which the ICT is completely empty. No itags are assigned to any thread" }, {, "EventCode": "0x4F146", - "EventName": "PM_MRK_DPTEG_FROM_L2.1_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventName": "PM_MRK_DPTEG_FROM_L21_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4F14A", - "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventCode": "0x10056", + "EventName": "PM_MEM_READ", + "BriefDescription": "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4" }, {, - "EventCode": "0x4F14C", - "EventName": "PM_MRK_DPTEG_FROM_DMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" + "EventCode": "0x3C04C", + "EventName": "PM_DATA_FROM_DL4", + "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load" }, {, - "EventCode": "0x40052", - "EventName": "PM_PUMP_MPRED", - "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" + "EventCode": "0x4E046", + "EventName": "PM_DPTEG_FROM_L21_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x4C058", - "EventName": "PM_MEM_CO", - "BriefDescription": "Memory castouts from this thread", - "PublicDescription": "" + "EventCode": "0x2E016", + "EventName": "PM_NTC_ISSUE_HELD_ARB", + "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)" }, {, - "EventCode": "0x4C15C", - "EventName": "PM_MRK_DERAT_MISS_16G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G", - "PublicDescription": "" + "EventCode": "0x15156", + "EventName": "PM_SYNC_MRK_FX_DIVIDE", + "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt" }, {, - "EventCode": "0x4D050", - "EventName": "PM_VSU_NON_FLOP_CMPL", - "BriefDescription": "Non FLOP operation completed", - "PublicDescription": "" + "EventCode": "0x1C056", + "EventName": "PM_DERAT_MISS_4K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K" }, {, - "EventCode": "0x4D052", - "EventName": "PM_2FLOP_CMPL", - "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg ", - "PublicDescription": "" + "EventCode": "0x2F142", + "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, {, - "EventCode": "0x45058", - "EventName": "PM_IC_MISS_CMPL", - "BriefDescription": "Non-speculative icache miss, counted at completion", - "PublicDescription": "" + "EventCode": "0x10024", + "EventName": "PM_PMC5_OVERFLOW", + "BriefDescription": "Overflow from counter 5" }, {, - "EventCode": "0x40062", - "EventName": "PM_DUMMY1_REMOVE_ME", - "BriefDescription": "Space holder for L2_PC_PM_MK_LDST_SCOPE_PRED_STATUS", - "PublicDescription": "" - }, - {, - "EventCode": "0x40064", - "EventName": "PM_DUMMY2_REMOVE_ME", - "BriefDescription": "Space holder for LS_PC_RELOAD_RA", - "PublicDescription": "" + "EventCode": "0x2C018", + "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", + "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)" }, {, "EventCode": "0x4006A", "EventName": "PM_IERAT_RELOAD_16M", - "BriefDescription": "IERAT Reloaded (Miss) for a 16M page", - "PublicDescription": "" + "BriefDescription": "IERAT Reloaded (Miss) for a 16M page" }, {, - "EventCode": "0x401EC", - "EventName": "PM_THRESH_EXC_2048", - "BriefDescription": "Threshold counter exceeded a value of 2048", - "PublicDescription": "" + "EventCode": "0x4E010", + "EventName": "PM_ICT_NOSLOT_IC_L3MISS", + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache" + }, + {, + "EventCode": "0x4D01C", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_SYNC", + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch" + }, + {, + "EventCode": "0x2D01A", + "EventName": "PM_ICT_NOSLOT_IC_MISS", + "BriefDescription": "Ict empty for this thread due to Icache Miss" + }, + {, + "EventCode": "0x3D152", + "EventName": "PM_MRK_DERAT_MISS_1G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" + }, + {, + "EventCode": "0x4F14A", + "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x30058", + "EventName": "PM_TLBIE_FIN", + "BriefDescription": "tlbie finished" + }, + {, + "EventCode": "0x100F8", + "EventName": "PM_ICT_NOSLOT_CYC", + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread" + }, + {, + "EventCode": "0x3E042", + "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x1F140", + "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2C05A", + "EventName": "PM_DERAT_MISS_1G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" + }, + {, + "EventCode": "0x1F058", + "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2", + "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation" + }, + {, + "EventCode": "0x1D14A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" + }, + {, + "EventCode": "0x10050", + "EventName": "PM_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x45058", + "EventName": "PM_IC_MISS_CMPL", + "BriefDescription": "Non-speculative icache miss, counted at completion" + }, + {, + "EventCode": "0x2D150", + "EventName": "PM_MRK_DERAT_MISS_4K", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K" + }, + {, + "EventCode": "0x34058", + "EventName": "PM_ICT_NOSLOT_BR_MPRED_ICMISS", + "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred" + }, + {, + "EventCode": "0x10022", + "EventName": "PM_PMC2_SAVED", + "BriefDescription": "PMC2 Rewind Value saved" + }, + {, + "EventCode": "0x2000A", + "EventName": "PM_HV_CYC", + "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration" + }, + {, + "EventCode": "0x1F144", + "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "Data PTEG reload" + }, + {, + "EventCode": "0x2D152", + "EventName": "PM_MRK_DERAT_MISS_2M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" + }, + {, + "EventCode": "0x2C046", + "EventName": "PM_DATA_FROM_RL2L3_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" + }, + {, + "EventCode": "0x20052", + "EventName": "PM_GRP_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x3F05A", + "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L3", + "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache" + }, + {, + "EventCode": "0x1E04A", + "EventName": "PM_DPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x10064", + "EventName": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN", + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch" + }, + {, + "EventCode": "0x2E046", + "EventName": "PM_DPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4F14C", + "EventName": "PM_MRK_DPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2E042", + "EventName": "PM_DPTEG_FROM_L3_MEPF", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2D012", + "EventName": "PM_CMPLU_STALL_DFU", + "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle" + }, + {, + "EventCode": "0x4C04C", + "EventName": "PM_DATA_FROM_DMEM", + "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load" + }, + {, + "EventCode": "0x30022", + "EventName": "PM_PMC4_SAVED", + "BriefDescription": "PMC4 Rewind Value saved (matched condition)" + }, + {, + "EventCode": "0x200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Run_cycles" }, {, "EventCode": "0x400F2", "EventName": "PM_1PLUS_PPC_DISP", - "BriefDescription": "Cycles at least one Instr Dispatched", - "PublicDescription": "" + "BriefDescription": "Cycles at least one Instr Dispatched" + }, + {, + "EventCode": "0x3D148", + "EventName": "PM_MRK_DATA_FROM_L21_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load" + }, + {, + "EventCode": "0x2F146", + "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4E01A", + "EventName": "PM_ICT_NOSLOT_DISP_HELD", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason" + }, + {, + "EventCode": "0x401EC", + "EventName": "PM_THRESH_EXC_2048", + "BriefDescription": "Threshold counter exceeded a value of 2048" + }, + {, + "EventCode": "0x35150", + "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" + }, + {, + "EventCode": "0x3E052", + "EventName": "PM_ICT_NOSLOT_IC_L3", + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3" + }, + {, + "EventCode": "0x2405A", + "EventName": "PM_NTC_FIN", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack" + }, + {, + "EventCode": "0x40052", + "EventName": "PM_PUMP_MPRED", + "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x30056", + "EventName": "PM_TM_ABORTS", + "BriefDescription": "Number of TM transactions aborted" + }, + {, + "EventCode": "0x2404C", + "EventName": "PM_INST_FROM_MEMORY", + "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x1C05A", + "EventName": "PM_DERAT_MISS_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" + }, + {, + "EventCode": "0x30024", + "EventName": "PM_PMC6_OVERFLOW", + "BriefDescription": "Overflow from counter 6" + }, + {, + "EventCode": "0x10068", + "EventName": "PM_BRU_FIN", + "BriefDescription": "Branch Instruction Finished" + }, + {, + "EventCode": "0x30020", + "EventName": "PM_PMC2_REWIND", + "BriefDescription": "PMC2 Rewind Event (did not match condition)" + }, + {, + "EventCode": "0x40064", + "EventName": "PM_DUMMY2_REMOVE_ME", + "BriefDescription": "Space holder for LS_PC_RELOAD_RA" + }, + {, + "EventCode": "0x3F148", + "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D01E", + "EventName": "PM_ICT_NOSLOT_BR_MPRED", + "BriefDescription": "Ict empty for this thread due to branch mispred" + }, + {, + "EventCode": "0x3405E", + "EventName": "PM_IFETCH_THROTTLE", + "BriefDescription": "Cycles in which Instruction fetch throttle was active." + }, + {, + "EventCode": "0x1F148", + "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x3E046", + "EventName": "PM_DPTEG_FROM_L21_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2F144", + "EventName": "PM_MRK_DPTEG_FROM_L31_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4C15C", + "EventName": "PM_MRK_DERAT_MISS_16G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G" + }, + {, + "EventCode": "0x14052", + "EventName": "PM_INST_GRP_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" + }, + {, + "EventCode": "0x10016", + "EventName": "PM_DSLB_MISS", + "BriefDescription": "Data SLB Miss - Total of all segment sizes" + }, + {, + "EventCode": "0xD0A8", + "EventName": "PM_DSLB_MISS", + "BriefDescription": "Data SLB Miss - Total of all segment sizes" + }, + {, + "EventCode": "0x4C058", + "EventName": "PM_MEM_CO", + "BriefDescription": "Memory castouts from this thread" + }, + {, + "EventCode": "0x40004", + "EventName": "PM_FXU_FIN", + "BriefDescription": "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete." + }, + {, + "EventCode": "0x2C054", + "EventName": "PM_DERAT_MISS_64K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K" + }, + {, + "EventCode": "0x10018", + "EventName": "PM_IC_DEMAND_CYC", + "BriefDescription": "Icache miss demand cycles" + }, + {, + "EventCode": "0x3C054", + "EventName": "PM_DERAT_MISS_16M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M" + }, + {, + "EventCode": "0x2D14E", + "EventName": "PM_MRK_DATA_FROM_L21_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load" + }, + {, + "EventCode": "0x3405C", + "EventName": "PM_CMPLU_STALL_DPLONG", + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle" + }, + {, + "EventCode": "0x4D052", + "EventName": "PM_2FLOP_CMPL", + "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg " + }, + {, + "EventCode": "0x1F142", + "EventName": "PM_MRK_DPTEG_FROM_L2", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x40062", + "EventName": "PM_DUMMY1_REMOVE_ME", + "BriefDescription": "Space holder for L2_PC_PM_MK_LDST_SCOPE_PRED_STATUS" + }, + {, + "EventCode": "0x4C012", + "EventName": "PM_CMPLU_STALL_ERAT_MISS", + "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss" + }, + {, + "EventCode": "0x4D050", + "EventName": "PM_VSU_NON_FLOP_CMPL", + "BriefDescription": "Non FLOP operation completed" + }, + {, + "EventCode": "0x2E012", + "EventName": "PM_TM_TX_PASS_RUN_CYC", + "BriefDescription": "cycles spent in successful transactions" + }, + {, + "EventCode": "0x4D04E", + "EventName": "PM_VSU_FSQRT_FDIV", + "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only" + }, + {, + "EventCode": "0x4C120", + "EventName": "PM_MRK_DATA_FROM_L2_MEPF", + "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load" + }, + {, + "EventCode": "0x10062", + "EventName": "PM_LD_L3MISS_PEND_CYC", + "BriefDescription": "Cycles L3 miss was pending for this thread" + }, + {, + "EventCode": "0x2F14C", + "EventName": "PM_MRK_DPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x14050", + "EventName": "PM_INST_CHIP_PUMP_CPRED", + "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch" + }, + {, + "EventCode": "0x2000E", + "EventName": "PM_FXU_BUSY", + "BriefDescription": "Cycles in which all 4 FXUs are busy. The FXU is running at capacity" + }, + {, + "EventCode": "0x20066", + "EventName": "PM_TLB_MISS", + "BriefDescription": "TLB Miss (I + D)" + }, + {, + "EventCode": "0x10054", + "EventName": "PM_PUMP_CPRED", + "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x4D124", + "EventName": "PM_MRK_DATA_FROM_L31_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load" }, {, "EventCode": "0x400F8", "EventName": "PM_FLUSH", - "BriefDescription": "Flush (any type)", - "PublicDescription": "" + "BriefDescription": "Flush (any type)" + }, + {, + "EventCode": "0x30004", + "EventName": "PM_CMPLU_STALL_EMQ_FULL", + "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full" + }, + {, + "EventCode": "0x1D154", + "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index 32ce71135f77..a2c95a99e168 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -1,146 +1,127 @@ [ {, - "EventCode": "0x0", - "EventName": "PM_SUSPENDED", - "BriefDescription": "Counter OFF", - "PublicDescription": "" + "EventCode": "0x20036", + "EventName": "PM_BR_2PATH", + "BriefDescription": "Branches that are not strongly biased" }, {, - "EventCode": "0x10026", - "EventName": "PM_TABLEWALK_CYC", - "BriefDescription": "Cycles when an instruction tablewalk is active", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E04C", - "EventName": "PM_DPTEG_FROM_LL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F14E", - "EventName": "PM_MRK_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x10060", - "EventName": "PM_TM_TRANS_RUN_CYC", - "BriefDescription": "run cycles in transactional state", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C012", - "EventName": "PM_CMPLU_STALL_DCACHE_MISS", - "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E04C", - "EventName": "PM_DPTEG_FROM_MEMORY", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x2C056", - "EventName": "PM_DTLB_MISS_4K", - "BriefDescription": "Data TLB Miss page size 4k", - "PublicDescription": "" - }, - {, - "EventCode": "0x3000C", - "EventName": "PM_FREQ_DOWN", - "BriefDescription": "Power Management: Below Threshold B", - "PublicDescription": "" - }, - {, - "EventCode": "0x3D142", - "EventName": "PM_MRK_DATA_FROM_LMEM", - "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x3F142", - "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x301E8", - "EventName": "PM_THRESH_EXC_64", - "BriefDescription": "Threshold counter exceeded a value of 64", - "PublicDescription": "" - }, - {, - "EventCode": "0x40118", - "EventName": "PM_MRK_DCACHE_RELOAD_INTV", - "BriefDescription": "Combined Intervention event", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C01E", - "EventName": "PM_CMPLU_STALL_CRYPTO", - "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D018", - "EventName": "PM_CMPLU_STALL_BRU", - "BriefDescription": "Completion stall due to a Branch Unit", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D128", - "EventName": "PM_MRK_DATA_FROM_LMEM_CYC", - "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E04E", - "EventName": "PM_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4F142", - "EventName": "PM_MRK_DPTEG_FROM_L3", - "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4F148", - "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x40050", - "EventName": "PM_SYS_PUMP_MPRED_RTY", - "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)", - "PublicDescription": "" + "EventCode": "0x40036", + "EventName": "PM_BR_2PATH", + "BriefDescription": "Branches that are not strongly biased" }, {, "EventCode": "0x40056", "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", - "BriefDescription": "Local memory above threshold for LSU medium", - "PublicDescription": "" + "BriefDescription": "Local memory above threshold for LSU medium" }, {, - "EventCode": "0x4D054", - "EventName": "PM_8FLOP_CMPL", - "BriefDescription": "8 FLOP instruction completed", - "PublicDescription": "" + "EventCode": "0x2C056", + "EventName": "PM_DTLB_MISS_4K", + "BriefDescription": "Data TLB Miss page size 4k" }, {, - "EventCode": "0x45050", - "EventName": "PM_1FLOP_CMPL", - "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed", - "PublicDescription": "" + "EventCode": "0x40118", + "EventName": "PM_MRK_DCACHE_RELOAD_INTV", + "BriefDescription": "Combined Intervention event" + }, + {, + "EventCode": "0x4F148", + "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x301E8", + "EventName": "PM_THRESH_EXC_64", + "BriefDescription": "Threshold counter exceeded a value of 64" + }, + {, + "EventCode": "0x4E04E", + "EventName": "PM_DPTEG_FROM_L3MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x40050", + "EventName": "PM_SYS_PUMP_MPRED_RTY", + "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)" + }, + {, + "EventCode": "0x1F14E", + "EventName": "PM_MRK_DPTEG_FROM_L2MISS", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4D018", + "EventName": "PM_CMPLU_STALL_BRU", + "BriefDescription": "Completion stall due to a Branch Unit" }, {, "EventCode": "0x45052", "EventName": "PM_4FLOP_CMPL", - "BriefDescription": "4 FLOP instruction completed", - "PublicDescription": "" + "BriefDescription": "4 FLOP instruction completed" + }, + {, + "EventCode": "0x3D142", + "EventName": "PM_MRK_DATA_FROM_LMEM", + "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load" + }, + {, + "EventCode": "0x4C01E", + "EventName": "PM_CMPLU_STALL_CRYPTO", + "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish" + }, + {, + "EventCode": "0x3000C", + "EventName": "PM_FREQ_DOWN", + "BriefDescription": "Power Management: Below Threshold B" + }, + {, + "EventCode": "0x4D128", + "EventName": "PM_MRK_DATA_FROM_LMEM_CYC", + "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load" + }, + {, + "EventCode": "0x4D054", + "EventName": "PM_8FLOP_CMPL", + "BriefDescription": "8 FLOP instruction completed" + }, + {, + "EventCode": "0x10026", + "EventName": "PM_TABLEWALK_CYC", + "BriefDescription": "Cycles when an instruction tablewalk is active" + }, + {, + "EventCode": "0x2C012", + "EventName": "PM_CMPLU_STALL_DCACHE_MISS", + "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest" + }, + {, + "EventCode": "0x2E04C", + "EventName": "PM_DPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x3F142", + "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x4F142", + "EventName": "PM_MRK_DPTEG_FROM_L3", + "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x10060", + "EventName": "PM_TM_TRANS_RUN_CYC", + "BriefDescription": "run cycles in transactional state" + }, + {, + "EventCode": "0x1E04C", + "EventName": "PM_DPTEG_FROM_LL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x45050", + "EventName": "PM_1FLOP_CMPL", + "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index d75859836f14..8c0f12024afa 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -1,272 +1,232 @@ [ {, - "EventCode": "0x10028", - "EventName": "PM_STALL_END_ICT_EMPTY", - "BriefDescription": "The number a times the core transitioned from a stall to ICT-empty for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x1C04E", - "EventName": "PM_DATA_FROM_L2MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x14044", - "EventName": "PM_INST_FROM_L3_NO_CONFLICT", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1404E", - "EventName": "PM_INST_FROM_L2MISS", - "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x1D142", - "EventName": "PM_MRK_DATA_FROM_L3.1_ECO_SHR_CYC", - "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x15048", - "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1504A", - "EventName": "PM_IPTEG_FROM_RL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x1E058", - "EventName": "PM_STCX_FAIL", - "BriefDescription": "stcx failed", - "PublicDescription": "" - }, - {, - "EventCode": "0x1F15E", - "EventName": "PM_MRK_PROBE_NOP_CMPL", - "BriefDescription": "Marked probeNops completed", - "PublicDescription": "" - }, - {, - "EventCode": "0x20112", - "EventName": "PM_MRK_NTF_FIN", - "BriefDescription": "Marked next to finish instruction finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x20016", - "EventName": "PM_ST_FIN", - "BriefDescription": "Store finish count. Includes speculative activity", - "PublicDescription": "" - }, - {, - "EventCode": "0x20018", - "EventName": "PM_ST_FWD", - "BriefDescription": "Store forwards that finished", - "PublicDescription": "" - }, - {, - "EventCode": "0x2011C", - "EventName": "PM_MRK_NTC_CYC", - "BriefDescription": "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E018", - "EventName": "PM_CMPLU_STALL_VFXLONG", - "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2E01C", - "EventName": "PM_CMPLU_STALL_TLBIE", - "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x2003E", - "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC", - "BriefDescription": "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)", - "PublicDescription": "" - }, - {, - "EventCode": "0x24042", - "EventName": "PM_INST_FROM_L3_MEPF", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x2D14A", - "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC", - "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load", - "PublicDescription": "" - }, - {, - "EventCode": "0x25046", - "EventName": "PM_IPTEG_FROM_RL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x2504A", - "EventName": "PM_IPTEG_FROM_RL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x2504C", - "EventName": "PM_IPTEG_FROM_MEMORY", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x201E6", - "EventName": "PM_THRESH_EXC_32", - "BriefDescription": "Threshold counter exceeded a value of 32", - "PublicDescription": "" - }, - {, - "EventCode": "0x200F0", - "EventName": "PM_ST_CMPL", - "BriefDescription": "Stores completed from S2Q (2nd-level store queue).", - "PublicDescription": "" - }, - {, - "EventCode": "0x200FE", - "EventName": "PM_DATA_FROM_L2MISS", - "BriefDescription": "Demand LD - L2 Miss (not L2 hit)", - "PublicDescription": "" + "EventCode": "0x1E", + "EventName": "PM_CYC", + "BriefDescription": "Processor cycles" }, {, "EventCode": "0x30010", "EventName": "PM_PMC2_OVERFLOW", - "BriefDescription": "Overflow from counter 2", - "PublicDescription": "" + "BriefDescription": "Overflow from counter 2" }, {, "EventCode": "0x3C046", - "EventName": "PM_DATA_FROM_L2.1_SHR", - "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x34042", - "EventName": "PM_INST_FROM_L3_DISP_CONFLICT", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x34046", - "EventName": "PM_INST_FROM_L2.1_SHR", - "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x3504A", - "EventName": "PM_IPTEG_FROM_RMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E048", - "EventName": "PM_DPTEG_FROM_DL2L3_SHR", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3E04C", - "EventName": "PM_DPTEG_FROM_DL4", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C05A", - "EventName": "PM_CMPLU_STALL_VDPLONG", - "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle", - "PublicDescription": "" - }, - {, - "EventCode": "0x3C05C", - "EventName": "PM_CMPLU_STALL_VFXU", - "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", - "PublicDescription": "" - }, - {, - "EventCode": "0x30066", - "EventName": "PM_LSU_FIN", - "BriefDescription": "LSU Finished a PPC instruction (up to 4 per cycle)", - "PublicDescription": "" - }, - {, - "EventCode": "0x300F0", - "EventName": "PM_ST_MISS_L1", - "BriefDescription": "Store Missed L1", - "PublicDescription": "" - }, - {, - "EventCode": "0x4D010", - "EventName": "PM_PMC1_SAVED", - "BriefDescription": "PMC1 Rewind Value saved", - "PublicDescription": "" - }, - {, - "EventCode": "0x40132", - "EventName": "PM_MRK_LSU_FIN", - "BriefDescription": "lsu marked instr PPC finish", - "PublicDescription": "" - }, - {, - "EventCode": "0x4C046", - "EventName": "PM_DATA_FROM_L2.1_MOD", - "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load", - "PublicDescription": "" - }, - {, - "EventCode": "0x44042", - "EventName": "PM_INST_FROM_L3", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)", - "PublicDescription": "" - }, - {, - "EventCode": "0x4504A", - "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE", - "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E048", - "EventName": "PM_DPTEG_FROM_DL2L3_MOD", - "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4E04C", - "EventName": "PM_DPTEG_FROM_DMEM", - "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included", - "PublicDescription": "" - }, - {, - "EventCode": "0x4405C", - "EventName": "PM_CMPLU_STALL_VDP", - "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector", - "PublicDescription": "" + "EventName": "PM_DATA_FROM_L21_SHR", + "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load" }, {, "EventCode": "0x4D05C", "EventName": "PM_DP_QP_FLOP_CMPL", - "BriefDescription": "Double-Precion or Quad-Precision instruction completed", - "PublicDescription": "" + "BriefDescription": "Double-Precion or Quad-Precision instruction completed" + }, + {, + "EventCode": "0x4E04C", + "EventName": "PM_DPTEG_FROM_DMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x20016", + "EventName": "PM_ST_FIN", + "BriefDescription": "Store finish count. Includes speculative activity" + }, + {, + "EventCode": "0x44042", + "EventName": "PM_INST_FROM_L3", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x1504A", + "EventName": "PM_IPTEG_FROM_RL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request" + }, + {, + "EventCode": "0x40132", + "EventName": "PM_MRK_LSU_FIN", + "BriefDescription": "lsu marked instr PPC finish" + }, + {, + "EventCode": "0x3C05C", + "EventName": "PM_CMPLU_STALL_VFXU", + "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes" + }, + {, + "EventCode": "0x30066", + "EventName": "PM_LSU_FIN", + "BriefDescription": "LSU Finished a PPC instruction (up to 4 per cycle)" + }, + {, + "EventCode": "0x2011C", + "EventName": "PM_MRK_NTC_CYC", + "BriefDescription": "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)" + }, + {, + "EventCode": "0x3E048", + "EventName": "PM_DPTEG_FROM_DL2L3_SHR", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x2E018", + "EventName": "PM_CMPLU_STALL_VFXLONG", + "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)" + }, + {, + "EventCode": "0x1C04E", + "EventName": "PM_DATA_FROM_L2MISS_MOD", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load" + }, + {, + "EventCode": "0x15048", + "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request" + }, + {, + "EventCode": "0x34046", + "EventName": "PM_INST_FROM_L21_SHR", + "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x1E058", + "EventName": "PM_STCX_FAIL", + "BriefDescription": "stcx failed" + }, + {, + "EventCode": "0x20112", + "EventName": "PM_MRK_NTF_FIN", + "BriefDescription": "Marked next to finish instruction finished" + }, + {, + "EventCode": "0x300F0", + "EventName": "PM_ST_MISS_L1", + "BriefDescription": "Store Missed L1" + }, + {, + "EventCode": "0x4C046", + "EventName": "PM_DATA_FROM_L21_MOD", + "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load" + }, + {, + "EventCode": "0x2504A", + "EventName": "PM_IPTEG_FROM_RL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request" + }, + {, + "EventCode": "0x2003E", + "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC", + "BriefDescription": "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)" + }, + {, + "EventCode": "0x201E6", + "EventName": "PM_THRESH_EXC_32", + "BriefDescription": "Threshold counter exceeded a value of 32" + }, + {, + "EventCode": "0x4405C", + "EventName": "PM_CMPLU_STALL_VDP", + "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector" + }, + {, + "EventCode": "0x4D010", + "EventName": "PM_PMC1_SAVED", + "BriefDescription": "PMC1 Rewind Value saved" + }, + {, + "EventCode": "0x200FE", + "EventName": "PM_DATA_FROM_L2MISS", + "BriefDescription": "Demand LD - L2 Miss (not L2 hit)" + }, + {, + "EventCode": "0x2D14A", + "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC", + "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" + }, + {, + "EventCode": "0x10028", + "EventName": "PM_STALL_END_ICT_EMPTY", + "BriefDescription": "The number a times the core transitioned from a stall to ICT-empty for this thread" + }, + {, + "EventCode": "0x2504C", + "EventName": "PM_IPTEG_FROM_MEMORY", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request" + }, + {, + "EventCode": "0x4504A", + "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE", + "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request" + }, + {, + "EventCode": "0x1404E", + "EventName": "PM_INST_FROM_L2MISS", + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x34042", + "EventName": "PM_INST_FROM_L3_DISP_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x4E048", + "EventName": "PM_DPTEG_FROM_DL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue)." }, {, "EventCode": "0x4E05C", "EventName": "PM_LSU_REJECT_LHS", - "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)", - "PublicDescription": "" + "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)" + }, + {, + "EventCode": "0x14044", + "EventName": "PM_INST_FROM_L3_NO_CONFLICT", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x3E04C", + "EventName": "PM_DPTEG_FROM_DL4", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" + }, + {, + "EventCode": "0x1F15E", + "EventName": "PM_MRK_PROBE_NOP_CMPL", + "BriefDescription": "Marked probeNops completed" + }, + {, + "EventCode": "0x20018", + "EventName": "PM_ST_FWD", + "BriefDescription": "Store forwards that finished" + }, + {, + "EventCode": "0x1D142", + "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC", + "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load" + }, + {, + "EventCode": "0x24042", + "EventName": "PM_INST_FROM_L3_MEPF", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)" + }, + {, + "EventCode": "0x25046", + "EventName": "PM_IPTEG_FROM_RL2L3_MOD", + "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request" + }, + {, + "EventCode": "0x3504A", + "EventName": "PM_IPTEG_FROM_RMEM", + "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request" + }, + {, + "EventCode": "0x3C05A", + "EventName": "PM_CMPLU_STALL_VDPLONG", + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle" + }, + {, + "EventCode": "0x2E01C", + "EventName": "PM_CMPLU_STALL_TLBIE", + "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2" } -] +] \ No newline at end of file From 5e97665f91d343b5bcf1f92249e45e18987ffd00 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 24 Jul 2017 16:40:03 -0700 Subject: [PATCH 171/253] perf stat: Fix saved values rbtree lookup The stat shadow saved values rbtree is indexed by a pointer. Fix the comparison function: - We cannot return a pointer delta as an int because that loses bits on 64bit. - Doing pointer arithmetic on the struct pointer only works if the objects are spaced by the multiple of the object size, which is not guaranteed for individual malloc'ed object Replace it with a proper comparison. This fixes various problems with values not being found. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170724234015.5165-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 719d6cb86952..a04cf56d3517 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -70,7 +70,11 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) return a->ctx - b->ctx; if (a->cpu != b->cpu) return a->cpu - b->cpu; - return a->evsel - b->evsel; + if (a->evsel == b->evsel) + return 0; + if ((char *)a->evsel < (char *)b->evsel) + return -1; + return +1; } static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, From c295036b6a2cf968ceac76d16d5fac43e22369f7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 24 Jul 2017 16:40:05 -0700 Subject: [PATCH 172/253] perf tools: Add missing newline to expr parser error messages Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170724234015.5165-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 954556bea36e..953e65ba2cc7 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -62,7 +62,7 @@ all_expr: expr { *final_val = $1; } expr: NUMBER | ID { if (lookup_id(ctx, $1, &$$) < 0) { - pr_debug("%s not found", $1); + pr_debug("%s not found\n", $1); YYABORT; } } From 28765bf263fcfef2f1508d603a7171b8263a3d68 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 11:16:40 -0300 Subject: [PATCH 173/253] perf test: Make 'list' subcommand match main 'perf test' numbering/matching Before: # perf test Synth 39: Synthesize thread map : Ok 41: Synthesize cpu map : Ok 42: Synthesize stat config : Ok 43: Synthesize stat : Ok 44: Synthesize stat round : Ok 45: Synthesize attr update : Ok # # perf test list Synth 1: Synthesize thread map 2: Synthesize cpu map 3: Synthesize stat config 4: Synthesize stat 5: Synthesize stat round 6: Synthesize attr update # After: # perf test Synth 39: Synthesize thread map : Ok 41: Synthesize cpu map : Ok 42: Synthesize stat config : Ok 43: Synthesize stat : Ok 44: Synthesize stat round : Ok 45: Synthesize attr update : Ok # # perf test list Synth 39: Synthesize thread map 41: Synthesize cpu map 42: Synthesize stat config 43: Synthesize stat 44: Synthesize stat round 45: Synthesize attr update # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Matt Fleming Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-pjhuhkphs7o3tkbqrukfv6bz@git.kernel.org Fixes: e8210cefb7e1 ("perf tests: Introduce iterator function for tests") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 3ccfd58a8c3c..436e22993909 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -465,10 +465,12 @@ static int perf_test__list(int argc, const char **argv) int i = 0; for_each_test(j, t) { + ++i; + if (argc > 1 && !strstr(t->desc, argv[1])) continue; - pr_info("%2d: %s\n", ++i, t->desc); + pr_info("%2d: %s\n", i, t->desc); } return 0; From 81f17c90f14122123cc52d1609f567834e56b122 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Aug 2017 15:16:31 -0300 Subject: [PATCH 174/253] perf test: Add 'struct test *' to the test functions This way we'll be able to pass more test specific parameters without having to change this function signature. Will be used by the upcoming 'shell tests', shell scripts that will call perf tools and check if they work as expected, comparing its effects on the system (think 'perf probe foo') the output produced, etc. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wq250w7j1opbzyiynozuajbl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 11 +- tools/perf/arch/x86/tests/insn-x86.c | 2 +- tools/perf/arch/x86/tests/intel-cqm.c | 2 +- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/arch/x86/tests/rdpmc.c | 2 +- tools/perf/tests/attr.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bitmap.c | 2 +- tools/perf/tests/bp_signal.c | 2 +- tools/perf/tests/bp_signal_overflow.c | 2 +- tools/perf/tests/bpf.c | 4 +- tools/perf/tests/builtin-test.c | 4 +- tools/perf/tests/clang.c | 4 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/cpumap.c | 4 +- tools/perf/tests/dso-data.c | 6 +- tools/perf/tests/dwarf-unwind.c | 2 +- tools/perf/tests/event-times.c | 2 +- tools/perf/tests/event_update.c | 2 +- tools/perf/tests/evsel-roundtrip-name.c | 2 +- tools/perf/tests/evsel-tp-sched.c | 2 +- tools/perf/tests/expr.c | 2 +- tools/perf/tests/fdarray.c | 4 +- tools/perf/tests/hists_cumulate.c | 2 +- tools/perf/tests/hists_filter.c | 2 +- tools/perf/tests/hists_link.c | 2 +- tools/perf/tests/hists_output.c | 2 +- tools/perf/tests/is_printable_array.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/kmod-path.c | 2 +- tools/perf/tests/llvm.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/tests/parse-events.c | 2 +- tools/perf/tests/parse-no-sample-id-all.c | 2 +- tools/perf/tests/perf-hooks.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/pmu.c | 2 +- tools/perf/tests/python-use.c | 2 +- tools/perf/tests/sample-parsing.c | 2 +- tools/perf/tests/sdt.c | 4 +- tools/perf/tests/stat.c | 6 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/tests/tests.h | 112 +++++++++---------- tools/perf/tests/thread-map.c | 6 +- tools/perf/tests/thread-mg-share.c | 2 +- tools/perf/tests/topology.c | 2 +- tools/perf/tests/unit_number__scnprintf.c | 2 +- tools/perf/tests/vmlinux-kallsyms.c | 2 +- 54 files changed, 127 insertions(+), 124 deletions(-) diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index b48de2f5813c..4e0b806a7a0f 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -1,11 +1,14 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H +#include +struct test; + /* Tests */ -int test__rdpmc(int subtest); -int test__perf_time_to_tsc(int subtest); -int test__insn_x86(int subtest); -int test__intel_cqm_count_nmi_context(int subtest); +int test__rdpmc(struct test *test __maybe_unused, int subtest); +int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); +int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index 08d9b2bc185c..b3860586a0c2 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64) * verbose (-v) option to see all the instructions and whether or not they * decoded successfuly. */ -int test__insn_x86(int subtest __maybe_unused) +int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = 0; diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index f9713a71d77e..57f86b6e7d6f 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -36,7 +36,7 @@ static pid_t spawn(void) * the last read counter value to avoid triggering a WARN_ON_ONCE() in * smp_call_function_many() caused by sending IPIs from NMI context. */ -int test__intel_cqm_count_nmi_context(int subtest __maybe_unused) +int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_evlist *evlist = NULL; struct perf_evsel *evsel = NULL; diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index e3ae9cff2b67..5dd7efb192ce 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -37,7 +37,7 @@ * %0 is returned, otherwise %-1 is returned. If TSC conversion is not * supported then then the test passes but " (not supported)" is printed. */ -int test__perf_time_to_tsc(int subtest __maybe_unused) +int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 500cf96db979..17fec30a0b31 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -154,7 +154,7 @@ static int __test__rdpmc(void) return 0; } -int test__rdpmc(int subtest __maybe_unused) +int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused) { int status = 0; int wret = 0; diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 84c0eb598a67..c9aafed7da15 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -169,7 +169,7 @@ static int run_dir(const char *d, const char *perf) return system(cmd); } -int test__attr(int subtest __maybe_unused) +int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) { struct stat st; char path_perf[PATH_MAX]; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 50f6d7afee58..d233ad336463 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -75,7 +75,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, } -int test__backward_ring_buffer(int subtest __maybe_unused) +int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0; char pid[16], sbuf[STRERR_BUFSIZE]; diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 9abe6c13090f..0d7c06584905 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -40,7 +40,7 @@ static int test_bitmap(const char *str) return ret; } -int test__bitmap_print(int subtest __maybe_unused) +int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to convert map", test_bitmap("1")); TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5")); diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 39bbb97cd30a..97937e1bc53a 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -164,7 +164,7 @@ static long long bp_count(int fd) return count; } -int test__bp_signal(int subtest __maybe_unused) +int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused) { struct sigaction sa; long long count1, count2, count3; diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c index 3b1ac6f31b15..61ecd8021f49 100644 --- a/tools/perf/tests/bp_signal_overflow.c +++ b/tools/perf/tests/bp_signal_overflow.c @@ -57,7 +57,7 @@ static long long bp_count(int fd) #define EXECUTIONS 10000 #define THRESHOLD 100 -int test__bp_signal_overflow(int subtest __maybe_unused) +int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_event_attr pe; struct sigaction sa; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 5876da126b58..4422ab636d30 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -321,7 +321,7 @@ static int check_env(void) return 0; } -int test__bpf(int i) +int test__bpf(struct test *test __maybe_unused, int i) { int err; @@ -351,7 +351,7 @@ const char *test__bpf_subtest_get_desc(int i __maybe_unused) return NULL; } -int test__bpf(int i __maybe_unused) +int test__bpf(struct test *test __maybe_unused, int i __maybe_unused) { pr_debug("Skip BPF test because BPF support is not compiled\n"); return TEST_SKIP; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 436e22993909..0186abbad899 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -179,7 +179,7 @@ static struct test generic_tests[] = { }, { .desc = "Session topology", - .func = test_session_topology, + .func = test__session_topology, }, { .desc = "BPF filter", @@ -325,7 +325,7 @@ static int run_test(struct test *test, int subtest) } } - err = test->func(subtest); + err = test->func(test, subtest); if (!dont_fork) exit(err); } diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c index c5bb2203f5a9..c60ec916f0f2 100644 --- a/tools/perf/tests/clang.c +++ b/tools/perf/tests/clang.c @@ -33,12 +33,12 @@ const char *test__clang_subtest_get_desc(int i) } #ifndef HAVE_LIBCLANGLLVM_SUPPORT -int test__clang(int i __maybe_unused) +int test__clang(struct test *test __maybe_unused, int i __maybe_unused) { return TEST_SKIP; } #else -int test__clang(int i) +int test__clang(struct test *test __maybe_unused, int i) { if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table)) return TEST_FAIL; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 94b7c7b02bde..761c5a448c56 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -673,7 +673,7 @@ static int do_test_code_reading(bool try_kcore) return err; } -int test__code_reading(int subtest __maybe_unused) +int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 4478773cdb97..199702252270 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -72,7 +72,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, } -int test__cpu_map_synthesize(int subtest __maybe_unused) +int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) { struct cpu_map *cpus; @@ -106,7 +106,7 @@ static int cpu_map_print(const char *str) return !strcmp(buf, str); } -int test__cpu_map_print(int subtest __maybe_unused) +int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1")); TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5")); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 8f08df5861cb..30aead42d136 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -112,7 +112,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine) return fd; } -int test__dso_data(int subtest __maybe_unused) +int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; struct dso *dso; @@ -247,7 +247,7 @@ static int set_fd_limit(int n) return setrlimit(RLIMIT_NOFILE, &rlim); } -int test__dso_data_cache(int subtest __maybe_unused) +int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(); @@ -307,7 +307,7 @@ int test__dso_data_cache(int subtest __maybe_unused) return 0; } -int test__dso_data_reopen(int subtest __maybe_unused) +int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 3e56d08f7995..2a7b9b47bbcb 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -154,7 +154,7 @@ static noinline int krava_1(struct thread *thread) return krava_2(thread); } -int test__dwarf_unwind(int subtest __maybe_unused) +int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machine *machine; struct thread *thread; diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 634f20c631d8..b82b981c3259 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -213,7 +213,7 @@ static int test_times(int (attach)(struct perf_evlist *), * and checks that enabled and running times * match. */ -int test__event_times(int subtest __maybe_unused) +int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused) { int err, ret = 0; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 63ecf21750eb..9484da2ec6b4 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -76,7 +76,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, return 0; } -int test__event_update(int subtest __maybe_unused) +int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_evlist *evlist; struct perf_evsel *evsel; diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index d2bea6f780f8..d32759b6e38a 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -97,7 +97,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) #define perf_evsel__name_array_test(names) \ __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) -int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused) +int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = 0, ret = 0; diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 1984b3bbfe15..5fc906d26c5c 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return ret; } -int test__perf_evsel__tp_sched_test(int subtest __maybe_unused) +int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); int ret = 0; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 6c6a3749aaf6..e93903295532 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -13,7 +13,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2) return 0; } -int test__expr(int subtest __maybe_unused) +int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) { const char *p; const char **other; diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index bc5982f42dc3..7d3a9e2ff897 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -26,7 +26,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE return printed + fdarray__fprintf(fda, fp); } -int test__fdarray__filter(int subtest __maybe_unused) +int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused) { int nr_fds, expected_fd[2], fd, err = TEST_FAIL; struct fdarray *fda = fdarray__new(5, 5); @@ -104,7 +104,7 @@ int test__fdarray__filter(int subtest __maybe_unused) return err; } -int test__fdarray__add(int subtest __maybe_unused) +int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct fdarray *fda = fdarray__new(2, 2); diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index d549a9f2c41b..8d19c0200cb7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -687,7 +687,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) return err; } -int test__hists_cumulate(int subtest __maybe_unused) +int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index df9c91f49af1..755ca551b810 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -101,7 +101,7 @@ static int add_hist_entries(struct perf_evlist *evlist, return TEST_FAIL; } -int test__hists_filter(int subtest __maybe_unused) +int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index a26cbb79e988..073c9c2856bc 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -264,7 +264,7 @@ static int validate_link(struct hists *leader, struct hists *other) return __validate_link(leader, 0) || __validate_link(other, 1); } -int test__hists_link(int subtest __maybe_unused) +int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; struct hists *hists, *first_hists; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 06e5080182d3..282d62eaebe2 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -573,7 +573,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) return err; } -int test__hists_output(int subtest __maybe_unused) +int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c index a5192f6a20d7..38f765767587 100644 --- a/tools/perf/tests/is_printable_array.c +++ b/tools/perf/tests/is_printable_array.c @@ -4,7 +4,7 @@ #include "debug.h" #include "print_binary.h" -int test__is_printable_array(int subtest __maybe_unused) +int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused) { char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 }; char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 }; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 614e45a3c603..739428603b71 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -49,7 +49,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm) * when an event is disabled but a dummy software event is not disabled. If the * test passes %0 is returned, otherwise %-1 is returned. */ -int test__keep_tracking(int subtest __maybe_unused) +int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index 6cd9e5107f77..8b9d4ba06c0e 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -50,7 +50,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect) #define M(path, c, e) \ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) -int test__kmod_path__parse(int subtest __maybe_unused) +int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused) { /* path alloc_name alloc_ext kmod comp name ext */ T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 482b5365e68d..5187b50dbafe 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -132,7 +132,7 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, return ret; } -int test__llvm(int subtest) +int test__llvm(struct test *test __maybe_unused, int subtest) { int ret; void *obj_buf = NULL; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 15c770856aac..bc8a70ee46d8 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -22,7 +22,7 @@ * Then it checks if the number of syscalls reported as perf events by * the kernel corresponds to the number of syscalls made. */ -int test__basic_mmap(int subtest __maybe_unused) +int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; union perf_event *event; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 6ea4d8a5d26b..f94a4196e7c9 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -221,7 +221,7 @@ static int mmap_events(synth_cb synth) * * by using all thread objects. */ -int test__mmap_thread_lookup(int subtest __maybe_unused) +int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused) { /* perf_event__synthesize_threads synthesize */ TEST_ASSERT_VAL("failed with sythesizing all", diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 1a74dd9fd067..87265117fd7f 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -16,7 +16,7 @@ #include "debug.h" #include "stat.h" -int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) +int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1, fd, cpu; struct cpu_map *cpus; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 9788fac91095..b6ee1c41f45d 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -14,7 +14,7 @@ #define AT_FDCWD -100 #endif -int test__syscall_openat_tp_fields(int subtest __maybe_unused) +int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .target = { diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index e44506e21ee7..85bb6729d303 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -10,7 +10,7 @@ #include "debug.h" #include "tests.h" -int test__openat_syscall_event(int subtest __maybe_unused) +int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1, fd; struct perf_evsel *evsel; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 812a053d1941..0f0b025faa4b 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1810,7 +1810,7 @@ static int test_pmu_events(void) return ret; } -int test__parse_events(int subtest __maybe_unused) +int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret1, ret2 = 0; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index c6207db09f12..91867dcc39f0 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -68,7 +68,7 @@ struct test_attr_event { * * Return: %0 on success, %-1 if the test fails. */ -int test__parse_no_sample_id_all(int subtest __maybe_unused) +int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused) { int err; diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index 665ecc19671c..bf2517d6de70 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -27,7 +27,7 @@ static void the_hook(void *_hook_flags) *p = 0; } -int test__perf_hooks(int subtest __maybe_unused) +int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) { int hook_flags = 0; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index d37cd9588cc0..19b650064b70 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -37,7 +37,7 @@ static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) return cpu; } -int test__PERF_RECORD(int subtest __maybe_unused) +int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused) { struct record_opts opts = { .target = { diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index a6d7aef30030..9f7f589f9c54 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -135,7 +135,7 @@ static struct list_head *test_terms_list(void) return &terms; } -int test__pmu(int subtest __maybe_unused) +int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) { char *format = test_format_dir_get(); LIST_HEAD(formats); diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index fa79509da535..598a7e058ad4 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -9,7 +9,7 @@ extern int verbose; -int test__python_use(int subtest __maybe_unused) +int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) { char *cmd; int ret; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index bac5c3885b3b..6d028f42b3cf 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -292,7 +292,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) * checks sample format bits separately and together. If the test passes %0 is * returned, otherwise %-1 is returned. */ -int test__sample_parsing(int subtest __maybe_unused) +int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) { const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; u64 sample_type; diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index d80171526f6f..a9903d9b8bc2 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -71,7 +71,7 @@ static int search_cached_probe(const char *target, return ret; } -int test__sdt_event(int subtests __maybe_unused) +int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) { int ret = TEST_FAIL; char __tempdir[] = "./test-buildid-XXXXXX"; @@ -109,7 +109,7 @@ int test__sdt_event(int subtests __maybe_unused) return ret; } #else -int test__sdt_event(int subtests __maybe_unused) +int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused) { pr_debug("Skip SDT event test because SDT support is not compiled\n"); return TEST_SKIP; diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 6a20ff2326bb..7f988a939036 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -45,7 +45,7 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused, return 0; } -int test__synthesize_stat_config(int subtest __maybe_unused) +int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_stat_config stat_config = { .aggr_mode = AGGR_CORE, @@ -75,7 +75,7 @@ static int process_stat_event(struct perf_tool *tool __maybe_unused, return 0; } -int test__synthesize_stat(int subtest __maybe_unused) +int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused) { struct perf_counts_values count; @@ -101,7 +101,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, return 0; } -int test__synthesize_stat_round(int subtest __maybe_unused) +int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused) { TEST_ASSERT_VAL("failed to synthesize stat_config", !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL, diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 828494db4a19..d88511f6072c 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -124,7 +124,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) return err; } -int test__sw_clock_freq(int subtest __maybe_unused) +int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused) { int ret; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 65474fd80da7..2acd78555192 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -306,7 +306,7 @@ static int process_events(struct perf_evlist *evlist, * evsel->system_wide and evsel->tracking flags (respectively) with other events * sometimes enabled or disabled. */ -int test__switch_tracking(int subtest __maybe_unused) +int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused) { const char *sched_switch = "sched:sched_switch"; struct switch_tracking switch_tracking = { .tids = NULL, }; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index cf00ebad2ef5..f0881d0dd9c9 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -32,7 +32,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, * if the number of exit event reported by the kernel is 1 or not * in order to check the kernel returns correct number of event. */ -int test__task_exit(int subtest __maybe_unused) +int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; union perf_event *event; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 577363809c9b..bc207ac48fde 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -28,7 +28,7 @@ enum { struct test { const char *desc; - int (*func)(int subtest); + int (*func)(struct test *test, int subtest); struct { bool skip_if_fail; int (*get_nr)(void); @@ -38,67 +38,67 @@ struct test { }; /* Tests */ -int test__vmlinux_matches_kallsyms(int subtest); -int test__openat_syscall_event(int subtest); -int test__openat_syscall_event_on_all_cpus(int subtest); -int test__basic_mmap(int subtest); -int test__PERF_RECORD(int subtest); -int test__perf_evsel__roundtrip_name_test(int subtest); -int test__perf_evsel__tp_sched_test(int subtest); -int test__syscall_openat_tp_fields(int subtest); -int test__pmu(int subtest); -int test__attr(int subtest); -int test__dso_data(int subtest); -int test__dso_data_cache(int subtest); -int test__dso_data_reopen(int subtest); -int test__parse_events(int subtest); -int test__hists_link(int subtest); -int test__python_use(int subtest); -int test__bp_signal(int subtest); -int test__bp_signal_overflow(int subtest); -int test__task_exit(int subtest); -int test__sw_clock_freq(int subtest); -int test__code_reading(int subtest); -int test__sample_parsing(int subtest); -int test__keep_tracking(int subtest); -int test__parse_no_sample_id_all(int subtest); -int test__dwarf_unwind(int subtest); -int test__expr(int subtest); -int test__hists_filter(int subtest); -int test__mmap_thread_lookup(int subtest); -int test__thread_mg_share(int subtest); -int test__hists_output(int subtest); -int test__hists_cumulate(int subtest); -int test__switch_tracking(int subtest); -int test__fdarray__filter(int subtest); -int test__fdarray__add(int subtest); -int test__kmod_path__parse(int subtest); -int test__thread_map(int subtest); -int test__llvm(int subtest); +int test__vmlinux_matches_kallsyms(struct test *test, int subtest); +int test__openat_syscall_event(struct test *test, int subtest); +int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest); +int test__basic_mmap(struct test *test, int subtest); +int test__PERF_RECORD(struct test *test, int subtest); +int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest); +int test__perf_evsel__tp_sched_test(struct test *test, int subtest); +int test__syscall_openat_tp_fields(struct test *test, int subtest); +int test__pmu(struct test *test, int subtest); +int test__attr(struct test *test, int subtest); +int test__dso_data(struct test *test, int subtest); +int test__dso_data_cache(struct test *test, int subtest); +int test__dso_data_reopen(struct test *test, int subtest); +int test__parse_events(struct test *test, int subtest); +int test__hists_link(struct test *test, int subtest); +int test__python_use(struct test *test, int subtest); +int test__bp_signal(struct test *test, int subtest); +int test__bp_signal_overflow(struct test *test, int subtest); +int test__task_exit(struct test *test, int subtest); +int test__sw_clock_freq(struct test *test, int subtest); +int test__code_reading(struct test *test, int subtest); +int test__sample_parsing(struct test *test, int subtest); +int test__keep_tracking(struct test *test, int subtest); +int test__parse_no_sample_id_all(struct test *test, int subtest); +int test__dwarf_unwind(struct test *test, int subtest); +int test__expr(struct test *test, int subtest); +int test__hists_filter(struct test *test, int subtest); +int test__mmap_thread_lookup(struct test *test, int subtest); +int test__thread_mg_share(struct test *test, int subtest); +int test__hists_output(struct test *test, int subtest); +int test__hists_cumulate(struct test *test, int subtest); +int test__switch_tracking(struct test *test, int subtest); +int test__fdarray__filter(struct test *test, int subtest); +int test__fdarray__add(struct test *test, int subtest); +int test__kmod_path__parse(struct test *test, int subtest); +int test__thread_map(struct test *test, int subtest); +int test__llvm(struct test *test, int subtest); const char *test__llvm_subtest_get_desc(int subtest); int test__llvm_subtest_get_nr(void); -int test__bpf(int subtest); +int test__bpf(struct test *test, int subtest); const char *test__bpf_subtest_get_desc(int subtest); int test__bpf_subtest_get_nr(void); -int test_session_topology(int subtest); -int test__thread_map_synthesize(int subtest); -int test__thread_map_remove(int subtest); -int test__cpu_map_synthesize(int subtest); -int test__synthesize_stat_config(int subtest); -int test__synthesize_stat(int subtest); -int test__synthesize_stat_round(int subtest); -int test__event_update(int subtest); -int test__event_times(int subtest); -int test__backward_ring_buffer(int subtest); -int test__cpu_map_print(int subtest); -int test__sdt_event(int subtest); -int test__is_printable_array(int subtest); -int test__bitmap_print(int subtest); -int test__perf_hooks(int subtest); -int test__clang(int subtest); +int test__session_topology(struct test *test, int subtest); +int test__thread_map_synthesize(struct test *test, int subtest); +int test__thread_map_remove(struct test *test, int subtest); +int test__cpu_map_synthesize(struct test *test, int subtest); +int test__synthesize_stat_config(struct test *test, int subtest); +int test__synthesize_stat(struct test *test, int subtest); +int test__synthesize_stat_round(struct test *test, int subtest); +int test__event_update(struct test *test, int subtest); +int test__event_times(struct test *test, int subtest); +int test__backward_ring_buffer(struct test *test, int subtest); +int test__cpu_map_print(struct test *test, int subtest); +int test__sdt_event(struct test *test, int subtest); +int test__is_printable_array(struct test *test, int subtest); +int test__bitmap_print(struct test *test, int subtest); +int test__perf_hooks(struct test *test, int subtest); +int test__clang(struct test *test, int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); -int test__unit_number__scnprint(int subtest); +int test__unit_number__scnprint(struct test *test, int subtest); bool test__bp_signal_is_supported(void); diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index a63d6945807b..b3423c744f46 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -9,7 +9,7 @@ #define NAME (const char *) "perf" #define NAMEUL (unsigned long) NAME -int test__thread_map(int subtest __maybe_unused) +int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused) { struct thread_map *map; @@ -76,7 +76,7 @@ static int process_event(struct perf_tool *tool __maybe_unused, return 0; } -int test__thread_map_synthesize(int subtest __maybe_unused) +int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused) { struct thread_map *threads; @@ -95,7 +95,7 @@ int test__thread_map_synthesize(int subtest __maybe_unused) return 0; } -int test__thread_map_remove(int subtest __maybe_unused) +int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused) { struct thread_map *threads; char *str; diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index 76686dd6f5ec..b9c7f58db6c4 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -4,7 +4,7 @@ #include "map.h" #include "debug.h" -int test__thread_mg_share(int subtest __maybe_unused) +int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_unused) { struct machines machines; struct machine *machine; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 803f893550d6..19b0561fd6f6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -86,7 +86,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map) return 0; } -int test_session_topology(int subtest __maybe_unused) +int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused) { char path[PATH_MAX]; struct cpu_map *map; diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c index 44589de084b8..15cd1cf8c129 100644 --- a/tools/perf/tests/unit_number__scnprintf.c +++ b/tools/perf/tests/unit_number__scnprintf.c @@ -5,7 +5,7 @@ #include "units.h" #include "debug.h" -int test__unit_number__scnprint(int subtest __maybe_unused) +int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused) { struct { u64 n; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 8456175fc234..86cb8868f67f 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -11,7 +11,7 @@ #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) -int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) +int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; struct rb_node *nd; From 1209b273a25ee60a267b606bea77e068b8556a8d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Aug 2017 16:49:15 -0300 Subject: [PATCH 175/253] perf test: Add infrastructure to run shell based tests To allow testing by directly using perf tools in scripts, checking that the effects on the system are the ones expected and that the output produced is as well the desired one. For instance, adding a probe at a well known location with 'perf probe', then checking that the results from using that probe to record are the desired ones, etc. The next csets will introduce tests using this new testing infrastructure. The scripts should return 0 for Ok, 1 for FAIL and 2 for SKIP. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-swbpn7amrjqffh83lsr39s9p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 170 +++++++++++++++++++++++++++++++- tools/perf/tests/tests.h | 1 + 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 0186abbad899..2bd158e3c02f 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -6,7 +6,10 @@ #include #include #include +#include +#include #include +#include #include "builtin.h" #include "hist.h" #include "intlist.h" @@ -14,8 +17,10 @@ #include "debug.h" #include "color.h" #include +#include "string2.h" #include "symbol.h" #include +#include static bool dont_fork; @@ -383,12 +388,143 @@ static int test_and_print(struct test *t, bool force_skip, int subtest) return err; } +static const char *shell_test__description(char *description, size_t size, + const char *path, const char *name) +{ + FILE *fp; + char filename[PATH_MAX]; + + path__join(filename, sizeof(filename), path, name); + fp = fopen(filename, "r"); + if (!fp) + return NULL; + + description = fgets(description, size, fp); + fclose(fp); + + return description ? trim(description + 1) : NULL; +} + +#define for_each_shell_test(dir, ent) \ + while ((ent = readdir(dir)) != NULL) \ + if (ent->d_type == DT_REG && ent->d_name[0] != '.') + +static const char *shell_tests__dir(char *path, size_t size) +{ + const char *devel_dirs[] = { "./tools/perf/tests", "./tests", }; + char *exec_path; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) { + struct stat st; + if (!lstat(devel_dirs[i], &st)) { + scnprintf(path, size, "%s/shell", devel_dirs[i]); + if (!lstat(devel_dirs[i], &st)) + return path; + } + } + + /* Then installed path. */ + exec_path = get_argv_exec_path(); + scnprintf(path, size, "%s/tests/shell", exec_path); + free(exec_path); + return path; +} + +static int shell_tests__max_desc_width(void) +{ + DIR *dir; + struct dirent *ent; + char path_dir[PATH_MAX]; + const char *path = shell_tests__dir(path_dir, sizeof(path_dir)); + int width = 0; + + if (path == NULL) + return -1; + + dir = opendir(path); + if (!dir) + return -1; + + for_each_shell_test(dir, ent) { + char bf[256]; + const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); + + if (desc) { + int len = strlen(desc); + + if (width < len) + width = len; + } + } + + closedir(dir); + return width; +} + +struct shell_test { + const char *dir; + const char *file; +}; + +static int shell_test__run(struct test *test, int subdir __maybe_unused) +{ + int err; + char script[PATH_MAX]; + struct shell_test *st = test->priv; + + path__join(script, sizeof(script), st->dir, st->file); + + err = system(script); + if (!err) + return TEST_OK; + + return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL; +} + +static int run_shell_tests(int argc, const char *argv[], int i, int width) +{ + DIR *dir; + struct dirent *ent; + char path_dir[PATH_MAX]; + struct shell_test st = { + .dir = shell_tests__dir(path_dir, sizeof(path_dir)), + }; + + if (st.dir == NULL) + return -1; + + dir = opendir(st.dir); + if (!dir) + return -1; + + for_each_shell_test(dir, ent) { + int curr = i++; + char desc[256]; + struct test test = { + .desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name), + .func = shell_test__run, + .priv = &st, + }; + + if (!perf_test__matches(&test, curr, argc, argv)) + continue; + + st.file = ent->d_name; + pr_info("%2d: %-*s:", i, width, test.desc); + test_and_print(&test, false, -1); + } + + closedir(dir); + return 0; +} + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { struct test *t; unsigned int j; int i = 0; - int width = 0; + int width = shell_tests__max_desc_width(); for_each_test(j, t) { int len = strlen(t->desc); @@ -455,6 +591,36 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) } } + return run_shell_tests(argc, argv, i, width); +} + +static int perf_test__list_shell(int argc, const char **argv, int i) +{ + DIR *dir; + struct dirent *ent; + char path_dir[PATH_MAX]; + const char *path = shell_tests__dir(path_dir, sizeof(path_dir)); + + if (path == NULL) + return -1; + + dir = opendir(path); + if (!dir) + return -1; + + for_each_shell_test(dir, ent) { + char bf[256]; + const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); + + ++i; + + if (argc > 1 && !strstr(desc, argv[1])) + continue; + + pr_info("%2d: %s\n", i, desc); + } + + closedir(dir); return 0; } @@ -473,6 +639,8 @@ static int perf_test__list(int argc, const char **argv) pr_info("%2d: %s\n", i, t->desc); } + perf_test__list_shell(argc, argv, i); + return 0; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index bc207ac48fde..c46ae818aac8 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -35,6 +35,7 @@ struct test { const char *(*get_desc)(int subtest); } subtest; bool (*is_supported)(void); + void *priv; }; /* Tests */ From 6d02acc1918094de12f885b35db9477e579b6bd0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 11:16:40 -0300 Subject: [PATCH 176/253] perf test: Make 'list' use same filtering code as main 'perf test' Before: # perf test Synth 39: Synthesize thread map : Ok 41: Synthesize cpu map : Ok 42: Synthesize stat config : Ok 43: Synthesize stat : Ok 44: Synthesize stat round : Ok 45: Synthesize attr update : Ok # perf test list Synth # After: # perf test Synth 39: Synthesize thread map : Ok 41: Synthesize cpu map : Ok 42: Synthesize stat config : Ok 43: Synthesize stat : Ok 44: Synthesize stat round : Ok 45: Synthesize attr update : Ok # perf test list Synth 39: Synthesize thread map 41: Synthesize cpu map 42: Synthesize stat config 43: Synthesize stat 44: Synthesize stat round 45: Synthesize attr update # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-v95tqqzuwawsmds3zn2mosje@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 2bd158e3c02f..9ecc44e68990 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -609,15 +609,16 @@ static int perf_test__list_shell(int argc, const char **argv, int i) return -1; for_each_shell_test(dir, ent) { + int curr = i++; char bf[256]; - const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); + struct test t = { + .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name), + }; - ++i; - - if (argc > 1 && !strstr(desc, argv[1])) + if (!perf_test__matches(&t, curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, desc); + pr_info("%2d: %s\n", i, t.desc); } closedir(dir); @@ -631,9 +632,10 @@ static int perf_test__list(int argc, const char **argv) int i = 0; for_each_test(j, t) { - ++i; + int curr = i++; - if (argc > 1 && !strstr(t->desc, argv[1])) + if (!perf_test__matches(t, curr, argc, argv) || + (t->is_supported && !t->is_supported())) continue; pr_info("%2d: %s\n", i, t->desc); @@ -668,7 +670,7 @@ int cmd_test(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) - return perf_test__list(argc, argv); + return perf_test__list(argc - 1, argv + 1); symbol_conf.priv_size = sizeof(int); symbol_conf.sort_by_name = true; From a3534842ddd04675abc67e80432aacc01670cca8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Aug 2017 16:54:53 -0300 Subject: [PATCH 177/253] perf test shell: Add 'probe_vfs_getname' shell test First perf shell test: # perf test vfs_getname 60: Add vfs_getname probe to get syscall args filenames: Ok # In verbose mode: # perf test -v vfs_getname 60: Add vfs_getname probe to get syscall args filenames: --- start --- test child forked, pid 19146 Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 test child finished with 0 ---- end ---- Add vfs_getname probe to get syscall args filenames: Ok # And if the vmlinux file is not found: # mv ../build/v4.12.0-rc6+/vmlinux ../build/v4.12.0-rc6+/vmlinux.hidden # perf test vfs_getname 60: Add vfs_getname probe to get syscall args filenames: Skip # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-8f3n22c1yn516ev30s603ow2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/probe_vfs_getname.sh | 30 +++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 tools/perf/tests/shell/probe_vfs_getname.sh diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh new file mode 100755 index 000000000000..d5f5248de927 --- /dev/null +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -0,0 +1,30 @@ +# Add vfs_getname probe to get syscall args filenames +# +# Arnaldo Carvalho de Melo , 2017 + +perf probe -l | grep -q probe:vfs_getname +had_vfs_getname=$? + +cleanup_probe_vfs_getname() { + if [ $had_vfs_getname -eq 1 ] ; then + perf probe -q -d probe:vfs_getname + fi +} + +add_probe_vfs_getname() { + local verbose=$1 + if [ $had_vfs_getname -eq 1 ] ; then + line=$(perf probe -L getname_flags | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" + fi +} + +skip_if_no_debuginfo() { + add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 + return 1 +} + +add_probe_vfs_getname || skip_if_no_debuginfo +err=$? +cleanup_probe_vfs_getname +exit $err From 122e0b947052f6106595fa29d63d514d2ebcdad9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 12:19:44 -0300 Subject: [PATCH 178/253] perf test shell: Install shell tests Now that we have shell tests, install them. Developers don't need this pass, as 'perf test' will look first at the in tree scripts at tools/perf/tests/shell/. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-j21u4v0jsehi0lpwqwjb4j45@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c1f7884979e2..eb1356774c7d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -760,7 +760,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ - $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' + $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ + $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell' install-bin: install-tools install-tests install-traceevent-plugins From 5ce669a59503f7c05c4648c70fe72bbe42613743 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 14:18:29 -0300 Subject: [PATCH 179/253] perf test shell: Move vfs_getname probe function to lib Multiple tests will be able to reuse these functions, to test things like perf report, 'trace', etc, using this probe. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-48xagvozhouhyi8fjota6o2d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++- .../perf/tests/shell/lib/probe_vfs_getname.sh | 28 +++++++++++++++++++ tools/perf/tests/shell/probe_vfs_getname.sh | 22 +-------------- 3 files changed, 32 insertions(+), 22 deletions(-) create mode 100644 tools/perf/tests/shell/lib/probe_vfs_getname.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index eb1356774c7d..70ddc65f898d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -762,7 +762,9 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ - $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell' + $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ + $(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib' install-bin: install-tools install-tests install-traceevent-plugins diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh new file mode 100644 index 000000000000..42308040f882 --- /dev/null +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -0,0 +1,28 @@ +# Arnaldo Carvalho de Melo , 2017 + +perf probe -l | grep -q probe:vfs_getname +had_vfs_getname=$? + +cleanup_probe_vfs_getname() { + if [ $had_vfs_getname -eq 1 ] ; then + perf probe -q -d probe:vfs_getname + fi +} + +add_probe_vfs_getname() { + local verbose=$1 + if [ $had_vfs_getname -eq 1 ] ; then + line=$(perf probe -L getname_flags | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" + fi +} + +skip_if_no_debuginfo() { + add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 + return 1 +} + +skip_if_no_debuginfo() { + add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 + return 1 +} diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index d5f5248de927..c8380137beef 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -2,27 +2,7 @@ # # Arnaldo Carvalho de Melo , 2017 -perf probe -l | grep -q probe:vfs_getname -had_vfs_getname=$? - -cleanup_probe_vfs_getname() { - if [ $had_vfs_getname -eq 1 ] ; then - perf probe -q -d probe:vfs_getname - fi -} - -add_probe_vfs_getname() { - local verbose=$1 - if [ $had_vfs_getname -eq 1 ] ; then - line=$(perf probe -L getname_flags | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" - fi -} - -skip_if_no_debuginfo() { - add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 - return 1 -} +. $(dirname $0)/lib/probe_vfs_getname.sh add_probe_vfs_getname || skip_if_no_debuginfo err=$? From 6060c7264ded3f6fa17e74bca3e192ce477e8063 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 14:39:37 -0300 Subject: [PATCH 180/253] perf test shell: Add test using probe:vfs_getname and verifying results This test uses the 'perf test shell' library to add probe:vfs_getname to the system then use it with 'perf record' using 'touch' to write to a temp file, then checks that that was captured by the vfs_getname probe in the generated perf.data file, with the temp file name as the pathname argument. Using it: # perf test "Use vfs_getname" 60: Use vfs_getname probe to get syscall args filenames: Ok # perf test -v "Use vfs_getname" 60: Use vfs_getname probe to get syscall args filenames: --- start --- test child forked, pid 16414 Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 Recording open file: [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.022 MB /tmp/vaca.perf.data.QZsn7 (13 samples) ] Looking at perf.data file for vfs_getname records for the file we touched: touch 16421 [002] 1255152.879561: probe:vfs_getname: (ffffffffa626e608) pathname="/tmp/vaca.l10SL" test child finished with 0 ---- end ---- Use vfs_getname probe to get syscall args filenames: Ok # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-t555fnhbcbxnukltk23dqxur@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../shell/record+script_probe_vfs_getname.sh | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100755 tools/perf/tests/shell/record+script_probe_vfs_getname.sh diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh new file mode 100755 index 000000000000..2725c5db699a --- /dev/null +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -0,0 +1,37 @@ +# Use vfs_getname probe to get syscall args filenames + +# Uses the 'perf test shell' library to add probe:vfs_getname to the system +# then use it with 'perf record' using 'touch' to write to a temp file, then +# checks that that was captured by the vfs_getname probe in the generated +# perf.data file, with the temp file name as the pathname argument. + +# Arnaldo Carvalho de Melo , 2017 + +. $(dirname $0)/lib/probe_vfs_getname.sh + +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + +record_open_file() { + echo "Recording open file:" + perf record -o ${perfdata} -e probe:vfs_getname touch $file +} + +perf_script_filenames() { + echo "Looking at perf.data file for vfs_getname records for the file we touched:" + perf script -i ${perfdata} | \ + egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\"" +} + +add_probe_vfs_getname || skip_if_no_debuginfo +err=$? +if [ $err -ne 0 ] ; then + exit $err +fi + +record_open_file && perf_script_filenames +err=$? +rm -f ${perfdata} +rm -f ${file} +cleanup_probe_vfs_getname +exit $err From e498f336d66f804f87787b2ec31821e349a37b7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Aug 2017 17:06:26 -0300 Subject: [PATCH 181/253] perf test shell: Add test using vfs_getname + 'perf trace' Uses the 'perf test shell' library to add probe:vfs_getname to the system then use it with 'perf trace' using 'touch' to write to a temp file, then checks that that was captured by the vfs_getname was used by 'perf trace', that already handles "probe:vfs_getname" if present, and used in the "open" syscall "filename" argument beautifier. Testing it: # perf test "trace + vfs_getname" 61: Check open filename arg using perf trace + vfs_getname: Ok # # perf test -v "trace + vfs_getname" 61: Check open filename arg using perf trace + vfs_getname: --- start --- test child forked, pid 30846 Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 2.237 ( 0.012 ms): touch/30855 open(filename: /tmp/temporary_file.kmoWQ, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 test child finished with 0 ---- end ---- Check open filename arg using perf trace + vfs_getname: Ok # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-j02nobfvvn9c7yrphdsnbqx0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../tests/shell/trace+probe_vfs_getname.sh | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 tools/perf/tests/shell/trace+probe_vfs_getname.sh diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh new file mode 100755 index 000000000000..d0fba0c5e354 --- /dev/null +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -0,0 +1,31 @@ +# Check open filename arg using perf trace + vfs_getname + +# Uses the 'perf test shell' library to add probe:vfs_getname to the system +# then use it with 'perf trace' using 'touch' to write to a temp file, then +# checks that that was captured by the vfs_getname was used by 'perf trace', +# that already handles "probe:vfs_getname" if present, and used in the +# "open" syscall "filename" argument beautifier. + +# Arnaldo Carvalho de Melo , 2017 + +. $(dirname $0)/lib/probe_vfs_getname.sh + +file=$(mktemp /tmp/temporary_file.XXXXX) + +trace_open_vfs_getname() { + perf trace -e open touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" +} + + +add_probe_vfs_getname || skip_if_no_debuginfo +err=$? +if [ $err -ne 0 ] ; then + exit $err +fi + +trace_open_vfs_getname +err=$? +rm -f ${file} +cleanup_probe_vfs_getname +exit $err From 80c345b255cbb4e9dfb193bf0bf5536217237f6a Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sun, 6 Aug 2017 23:24:34 +0200 Subject: [PATCH 182/253] perf util: Take elf_name as const string in dso__demangle_sym The input string is not modified and thus can be passed in as a pointer to const data. Signed-off-by: Milian Wolff Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20170806212446.24925-3-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol-minimal.c | 2 +- tools/perf/util/symbol.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 502505cf236a..7cf18f14e152 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -391,7 +391,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * return 0; } -char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name) +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) { return demangle_sym(dso, kmodule, elf_name); } diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 40bf5d4c0bfd..1a5aa35b0100 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -377,7 +377,7 @@ void symbol__elf_init(void) char *dso__demangle_sym(struct dso *dso __maybe_unused, int kmodule __maybe_unused, - char *elf_name __maybe_unused) + const char *elf_name __maybe_unused) { return NULL; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 41ebba9a2eb2..f0b08810d7fa 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -306,7 +306,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map); -char *dso__demangle_sym(struct dso *dso, int kmodule, char *elf_name); +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); void symbols__insert(struct rb_root *symbols, struct symbol *sym); From d964b1cdbd94f359f1f65f81440be84ceb45978e Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Sun, 6 Aug 2017 23:24:45 +0200 Subject: [PATCH 183/253] perf srcline: Do not consider empty files as valid srclines Sometimes we get a non-null, but empty, string for the filename from bfd. This then results in srclines of the form ":0", which is different from the canonical SRCLINE_UNKNOWN in the form "??:0". Set the file to NULL if it is empty to fix this. Signed-off-by: Milian Wolff Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20170806212446.24925-14-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ebc88a74e67b..ed8e8d2de942 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -155,6 +155,9 @@ static void find_address_in_section(bfd *abfd, asection *section, void *data) a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, &a2l->filename, &a2l->funcname, &a2l->line); + + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; } static struct a2l_data *addr2line_init(const char *path) @@ -248,6 +251,9 @@ static int addr2line(const char *dso_name, u64 addr, &a2l->funcname, &a2l->line) && cnt++ < MAX_INLINE_NEST) { + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; + if (node != NULL) { if (inline_list__append_dso_a2l(dso, node)) return 0; From 9ad4652b66f19a60f07e63b942b80b5c2d7465bf Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 3 Aug 2017 15:49:02 +0200 Subject: [PATCH 184/253] perf record: Fix wrong size in perf_record_mmap for last kernel module During work on perf report for s390 I ran into the following issue: 0 0x318 [0x78]: PERF_RECORD_MMAP -1/0: [0x3ff804d6990(0xfffffc007fb2966f) @ 0]: x /lib/modules/4.12.0perf1+/kernel/drivers/s390/net/qeth_l2.ko This is a PERF_RECORD_MMAP entry of the perf.data file with an invalid module size for qeth_l2.ko (the s390 ethernet device driver). Even a mainframe does not have 0xfffffc007fb2966f bytes of main memory. It turned out that this wrong size is created by the perf record command. What happens is this function call sequence from __cmd_record(): perf_session__new(): perf_session__create_kernel_maps(): machine__create_kernel_maps(): machine__create_modules(): Creates map for all loaded kernel modules. modules__parse(): Reads /proc/modules and extracts module name and load address (1st and last column) machine__create_module(): Called for every module found in /proc/modules. Creates a new map for every module found and enters module name and start address into the map. Since the module end address is unknown it is set to zero. This ends up with a kernel module map list sorted by module start addresses. All module end addresses are zero. Last machine__create_kernel_maps() calls function map_groups__fixup_end(). This function iterates through the maps and assigns each map entry's end address the successor map entry start address. The last entry of the map group has no successor, so ~0 is used as end to consume the remaining memory. Later __cmd_record calls function record__synthesize() which in turn calls perf_event__synthesize_kernel_mmap() and perf_event__synthesize_modules() to create PERF_REPORT_MMAP entries into the perf.data file. On s390 this results in the last module qeth_l2.ko (which has highest start address, see module table: [root@s8360047 perf]# cat /proc/modules qeth_l2 86016 1 - Live 0x000003ff804d6000 qeth 266240 1 qeth_l2, Live 0x000003ff80296000 ccwgroup 24576 1 qeth, Live 0x000003ff80218000 vmur 36864 0 - Live 0x000003ff80182000 qdio 143360 2 qeth_l2,qeth, Live 0x000003ff80002000 [root@s8360047 perf]# ) to be the last entry and its map has an end address of ~0. When the PERF_RECORD_MMAP entry is created for kernel module qeth_l2.ko its start address and length is written. The length is calculated in line: event->mmap.len = pos->end - pos->start; and results in 0xffffffffffffffff - 0x3ff804d6990(*) = 0xfffffc007fb2966f (*) On s390 the module start address is actually determined by a __weak function named arch__fix_module_text_start() in machine__create_module(). I think this improvable. We can use the module size (2nd column of /proc/modules) to get each loaded kernel module size and calculate its end address. Only for map entries which do not have a valid end address (end is still zero) we can use the heuristic we have now, that is use successor start address or ~0. Signed-off-by: Thomas-Mich Richter Reviewed-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter Cc: Zvonko Kosic LPU-Reference: 20170803134902.47207-2-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-nmoqij5b5vxx7rq2ckwu8iaj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 +++- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 21 ++++++++++++++------- tools/perf/util/symbol.h | 2 +- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d4df353051af..5c8eacaca4f4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1138,7 +1138,8 @@ int __weak arch__fix_module_text_start(u64 *start __maybe_unused, return 0; } -static int machine__create_module(void *arg, const char *name, u64 start) +static int machine__create_module(void *arg, const char *name, u64 start, + u64 size) { struct machine *machine = arg; struct map *map; @@ -1149,6 +1150,7 @@ static int machine__create_module(void *arg, const char *name, u64 start) map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; + map->end = start + size; dso__kernel_module_get_build_id(map->dso, machine->root_dir); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7cf18f14e152..98deabb9b47e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1442,7 +1442,7 @@ static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci, static int kcore_copy__process_modules(void *arg, const char *name __maybe_unused, - u64 start) + u64 start, u64 size __maybe_unused) { struct kcore_copy_info *kci = arg; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 971b990557b4..5909ee4c7ade 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -233,7 +233,8 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) goto out_unlock; for (next = map__next(curr); next; next = map__next(curr)) { - curr->end = next->start; + if (!curr->end) + curr->end = next->start; curr = next; } @@ -241,7 +242,8 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) * We still haven't the actual symbols, so guess the * last map final address. */ - curr->end = ~0ULL; + if (!curr->end) + curr->end = ~0ULL; out_unlock: pthread_rwlock_unlock(&maps->lock); @@ -552,7 +554,7 @@ void dso__sort_by_name(struct dso *dso, enum map_type type) int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, - u64 start)) + u64 start, u64 size)) { char *line = NULL; size_t n; @@ -565,8 +567,8 @@ int modules__parse(const char *filename, void *arg, while (1) { char name[PATH_MAX]; - u64 start; - char *sep; + u64 start, size; + char *sep, *endptr; ssize_t line_len; line_len = getline(&line, &n, file); @@ -598,7 +600,11 @@ int modules__parse(const char *filename, void *arg, scnprintf(name, sizeof(name), "[%s]", line); - err = process_module(arg, name, start); + size = strtoul(sep + 1, &endptr, 0); + if (*endptr != ' ' && *endptr != '\t') + continue; + + err = process_module(arg, name, start, size); if (err) break; } @@ -945,7 +951,8 @@ static struct module_info *find_module(const char *name, return NULL; } -static int __read_proc_modules(void *arg, const char *name, u64 start) +static int __read_proc_modules(void *arg, const char *name, u64 start, + u64 size __maybe_unused) { struct rb_root *modules = arg; struct module_info *mi; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f0b08810d7fa..b221671070e2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -273,7 +273,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, - u64 start)); + u64 start, u64 size)); int filename__read_debuglink(const char *filename, char *debuglink, size_t size); From 4a084ecfc8217bca1ab074feb4bc8a2c0f828960 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 3 Aug 2017 15:49:01 +0200 Subject: [PATCH 185/253] perf report: Fix module symbol adjustment for s390x The 'perf report' tool does not display the addresses of kernel module symbols correctly. For example symbol qeth_send_ipa_cmd in kernel module qeth.ko has this relative address for function qeth_send_ipa_cmd(): [root@s8360047 linux]# nm -g drivers/s390/net/qeth.ko | fgrep send_ipa_cmd 0000000000013088 T qeth_send_ipa_cmd The module is loaded at address: [root@s8360047 linux]# cat /sys/module/qeth/sections/.text 0x000003ff80296d20 [root@s8360047 linux]# This should result in a start address of: 0x13088 + 0x3ff80296d20 = 0x3ff802a9da8 Using crash to verify the address on a live system: [root@s8360046 linux]# crash vmlinux crash 7.1.9++ Copyright (C) 2002-2016 Red Hat, Inc. Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation [...] crash> mod -s qeth drivers/s390/net/qeth.ko MODULE NAME SIZE OBJECT FILE 3ff8028d700 qeth 151552 drivers/s390/net/qeth.ko crash> sym qeth_send_ipa_cmd 3ff802a9da8 (T) qeth_send_ipa_cmd [qeth] /root/linux/drivers/s390/net/qeth_core_main.c: 2944 crash> Now perf report displays the address of symbol qeth_send_ipa_cmd: symbol__new: qeth_send_ipa_cmd 0x130f0-0x132ce There is a difference of 0x68 between the entry in the symbol table (see nm command above) and perf. The difference is from the offset the .text segment of qeth.ko: [root@s8360047 perf]# readelf -a drivers/s390/net/qeth.ko Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .note.gnu.build-i NOTE 0000000000000000 00000040 0000000000000024 0000000000000000 A 0 0 4 [ 2] .text PROGBITS 0000000000000000 00000068 000000000001c8a0 0000000000000000 AX 0 0 8 As seen the .text segment has an offset of 0x68 with start address 0x0. Therefore 0x68 is added to the address of qeth_send_ipa_cmd and thus 0x13088 + 0x68 = 0x130f0 is displayed. This is wrong, perf report needs to display the start address of symbol qeth_send_ipa_cmd at 0x13088 + qeth.ko.text section start address. The qeth.ko module .text start address is available in the qeth.ko DSO map. Just identify the kernel module symbols and correct the addresses. With the fix I see this correct address for symbol: symbol__new: qeth_send_ipa_cmd 0x3ff802a9da8-0x3ff802a9f86 Signed-off-by: Thomas-Mich Richter Reviewed-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter Cc: Zvonko Kosic LPU-Reference: 20170803134902.47207-1-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-q8lktlpoxb5e3dj52u1s1rw4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/util/sym-handling.c | 7 +++++++ tools/perf/util/symbol-elf.c | 8 +++++++- tools/perf/util/symbol.h | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c index b6cd056ccf71..e103f6e46afe 100644 --- a/tools/perf/arch/s390/util/sym-handling.c +++ b/tools/perf/arch/s390/util/sym-handling.c @@ -19,4 +19,11 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN; } +void arch__adjust_sym_map_offset(GElf_Sym *sym, + GElf_Shdr *shdr __maybe_unused, + struct map *map) +{ + if (map->type == MAP__FUNCTION) + sym->st_value += map->start; +} #endif diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 98deabb9b47e..a70479061fce 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -793,6 +793,12 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr, + struct map *map __maybe_unused) +{ + sym->st_value -= shdr->sh_addr - shdr->sh_offset; +} + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { @@ -973,7 +979,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, /* Adjust symbol to map to file offset */ if (adjust_kernel_syms) - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + arch__adjust_sym_map_offset(&sym, &shdr, map); if (strcmp(section_name, (curr_dso->short_name + diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b221671070e2..d00a012cfdfb 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -343,6 +343,9 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); +void arch__adjust_sym_map_offset(GElf_Sym *sym, + GElf_Shdr *shdr __maybe_unused, + struct map *map __maybe_unused); #endif #define SYMBOL_A 0 From 8fc375d7d36c72b4c2d55f5c24be022a939295d4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Aug 2017 14:50:49 -0300 Subject: [PATCH 186/253] perf test shell: Add uprobes + backtrace ping test Installs a probe on libc's inet_pton function, that will use uprobes, then use 'perf trace' on a ping to localhost asking for just one packet with the a backtrace 3 levels deep, check that it is what we expect. This needs no debuginfo package, all is done using the libc ELF symtab and the CFI info in the binaries. Testing it: # perf test ping 61: probe libc's inet_pton & backtrace it with ping : Ok In verbose mode: # perf test -v ping 61: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 1007 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7f75fce12a20)) __GI___inet_pton (/usr/lib64/libc-2.24.so) getaddrinfo (/usr/lib64/libc-2.24.so) _init (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-idrntt4nbg15aafu8hjmv7sk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../tests/shell/trace+probe_libc_inet_pton.sh | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 tools/perf/tests/shell/trace+probe_libc_inet_pton.sh diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh new file mode 100755 index 000000000000..1b9a276e2ace --- /dev/null +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -0,0 +1,40 @@ +# probe libc's inet_pton & backtrace it with ping + +# Installs a probe on libc's inet_pton function, that will use uprobes, +# then use 'perf trace' on a ping to localhost asking for just one packet +# with the a backtrace 3 levels deep, check that it is what we expect. +# This needs no debuginfo package, all is done using the libc ELF symtab +# and the CFI info in the binaries. + +# Arnaldo Carvalho de Melo , 2017 + +trace_libc_inet_pton_backtrace() { + idx=0 + expected[0]="PING.*bytes" + expected[1]="64 bytes from ::1.*" + expected[2]=".*ping statistics.*" + expected[3]=".*packets transmitted.*" + expected[4]="rtt min.*" + expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" + expected[6]=".*inet_pton[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" + expected[7]="getaddrinfo[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" + expected[8]=".*\(.*/bin/ping.*\)$" + + perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 |& grep -v ^$ | while read line ; do + echo $line + echo "$line" | egrep -q "${expected[$idx]}" + if [ $? -ne 0 ] ; then + printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line" + exit 1 + fi + let idx+=1 + [ $idx -eq 9 ] && break + done +} + +perf probe -q /lib64/libc-*.so inet_pton && \ +trace_libc_inet_pton_backtrace +err=$? +rm -f ${file} +perf probe -q -d probe_libc:inet_pton +exit $err From 06786963020cc42c50f2c95d1ca5cb839460fa0e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Aug 2017 13:53:10 -0300 Subject: [PATCH 187/253] perf tests shell: Remove duplicate skip_if_no_debuginfo() function Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3zxjswdbs2au3ih0rino0iy1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 42308040f882..3eaddf190a30 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -21,8 +21,3 @@ skip_if_no_debuginfo() { add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 return 1 } - -skip_if_no_debuginfo() { - add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 - return 1 -} From 1ad5a182690c60b7effafec3c1c4512c11e5b545 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Aug 2017 14:26:09 -0300 Subject: [PATCH 188/253] perf test shell: Check if 'perf probe' is available, skip tests if not Add a library function that checks if 'perf probe' is built into the tool being tested, skipping tests that need it. Testing it on a system after removing the library needed to build 'probe' as a perf subcommand: # perf test ping vfs_getname 59: Use vfs_getname probe to get syscall args filenames : Skip 60: probe libc's inet_pton & backtrace it with ping : Skip 61: Check open filename arg using perf trace + vfs_getname: Skip 62: Add vfs_getname probe to get syscall args filenames : Skip # perf probe perf: 'probe' is not a perf-command. See 'perf --help'. # Now reinstalling elfutils-libelf-devel on this Fedora 26 system to rebuild perf and then retest this: # perf test ping vfs_getname 60: Use vfs_getname probe to get syscall args filenames : Ok 61: probe libc's inet_pton & backtrace it with ping : Ok 62: Check open filename arg using perf trace + vfs_getname: Ok 63: Add vfs_getname probe to get syscall args filenames : Ok # Reported-by: Kim Phillips Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ctdck2gzsskqhjzu3ebb62zm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe.sh | 6 ++++++ tools/perf/tests/shell/lib/probe_vfs_getname.sh | 2 +- tools/perf/tests/shell/probe_vfs_getname.sh | 4 ++++ tools/perf/tests/shell/record+script_probe_vfs_getname.sh | 4 ++++ tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 3 +++ tools/perf/tests/shell/trace+probe_vfs_getname.sh | 4 ++++ 6 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tools/perf/tests/shell/lib/probe.sh diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh new file mode 100644 index 000000000000..61da3b2c9bca --- /dev/null +++ b/tools/perf/tests/shell/lib/probe.sh @@ -0,0 +1,6 @@ +# Arnaldo Carvalho de Melo , 2017 + +skip_if_no_perf_probe() { + perf probe |& grep -q 'is not a perf-command' && return 2 + return 0 +} diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 3eaddf190a30..46c1bb707600 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -1,6 +1,6 @@ # Arnaldo Carvalho de Melo , 2017 -perf probe -l | grep -q probe:vfs_getname +perf probe -l |& grep -q probe:vfs_getname had_vfs_getname=$? cleanup_probe_vfs_getname() { diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index c8380137beef..9b7635184dc2 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -2,6 +2,10 @@ # # Arnaldo Carvalho de Melo , 2017 +. $(dirname $0)/lib/probe.sh + +skip_if_no_perf_probe || exit 2 + . $(dirname $0)/lib/probe_vfs_getname.sh add_probe_vfs_getname || skip_if_no_debuginfo diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 2725c5db699a..ba29535b8580 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -7,6 +7,10 @@ # Arnaldo Carvalho de Melo , 2017 +. $(dirname $0)/lib/probe.sh + +skip_if_no_perf_probe || exit 2 + . $(dirname $0)/lib/probe_vfs_getname.sh perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 1b9a276e2ace..99dafad61954 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -8,6 +8,8 @@ # Arnaldo Carvalho de Melo , 2017 +. $(dirname $0)/lib/probe.sh + trace_libc_inet_pton_backtrace() { idx=0 expected[0]="PING.*bytes" @@ -32,6 +34,7 @@ trace_libc_inet_pton_backtrace() { done } +skip_if_no_perf_probe && \ perf probe -q /lib64/libc-*.so inet_pton && \ trace_libc_inet_pton_backtrace err=$? diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index d0fba0c5e354..2e68c5f120da 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -8,6 +8,10 @@ # Arnaldo Carvalho de Melo , 2017 +. $(dirname $0)/lib/probe.sh + +skip_if_no_perf_probe || exit 2 + . $(dirname $0)/lib/probe_vfs_getname.sh file=$(mktemp /tmp/temporary_file.XXXXX) From 2b728861a63a7ba6073a476244a83f575864273e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Aug 2017 14:40:58 -0300 Subject: [PATCH 189/253] perf test shell vfs_getname: Skip for tools built with NO_LIBDWARF=1 If that is the case, or if the required lib is not present, e.g. elfutils-devel in Fedora systems, then just skip the tests requiring DWARF analysis. Before: # rpm -e elfutils-devel # perf test ping vfs_getname 60: Use vfs_getname probe to get syscall args filenames : FAILED! 61: probe libc's inet_pton & backtrace it with ping : Ok 62: Check open filename arg using perf trace + vfs_getname: FAILED! 63: Add vfs_getname probe to get syscall args filenames : FAILED! # After: # perf test vfs_getname 60: Use vfs_getname probe to get syscall args filenames : Skip 62: Check open filename arg using perf trace + vfs_getname: Skip 63: Add vfs_getname probe to get syscall args filenames : Skip # Then, reinstalling elfutils-devel, rebuilding the tool and running again: # perf test vfs_getname 60: Use vfs_getname probe to get syscall args filenames : Ok 62: Check open filename arg using perf trace + vfs_getname: Ok 63: Add vfs_getname probe to get syscall args filenames : Ok # Reported-by: Kim Phillips Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d67tvn401fxrwr97pu5ihfb1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 46c1bb707600..f832395daad7 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -12,12 +12,12 @@ cleanup_probe_vfs_getname() { add_probe_vfs_getname() { local verbose=$1 if [ $had_vfs_getname -eq 1 ] ; then - line=$(perf probe -L getname_flags | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" + line=$(perf probe -L getname_flags |& egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" fi } skip_if_no_debuginfo() { - add_probe_vfs_getname -v 2>&1 | grep -q "^Failed to find the path for kernel" && return 2 + add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for kernel|Debuginfo-analysis is not supported)" && return 2 return 1 } From c8a827285c33e7a19e81dfbff2740e1e67ca42df Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Aug 2017 11:31:26 +0300 Subject: [PATCH 190/253] perf scripts python: Fix missing call_path_id in export-to-postgresql script The export does not work if only branches are exported because of a missing column in the samples table. Fix by adding the missing call_path_id. Fixes: 3521f3bc9dae ("perf script: Update export-to-postgresql to support callchain export") Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/1501749090-20357-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 7656ff8aa066..f57811443beb 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -340,7 +340,8 @@ if branches: 'to_sym_offset bigint,' 'to_ip bigint,' 'branch_type integer,' - 'in_tx boolean)') + 'in_tx boolean,' + 'call_path_id bigint)') else: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' From 2295e9f850b0efbc57c81fccdd8bd8d26fe10029 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Aug 2017 11:31:27 +0300 Subject: [PATCH 191/253] perf scripts python: Fix query in call-graph-from-postgresql.py Add a missing space which seemed not to affect PostgreSQL but upsets SQLite. Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/1501749090-20357-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/call-graph-from-postgresql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py index e78fdc2a5a9d..ed9f7f3ccf22 100644 --- a/tools/perf/scripts/python/call-graph-from-postgresql.py +++ b/tools/perf/scripts/python/call-graph-from-postgresql.py @@ -160,7 +160,7 @@ class TreeItem(): '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' '( SELECT ip FROM call_paths where id = call_path_id ) ' 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + - 'ORDER BY call_path_id') + ' ORDER BY call_path_id') if not ret: raise Exception("Query failed: " + query.lastError().text()) last_call_path_id = 0 From 564b9527d1ccf5d581275391e39ac4b1f29f0d08 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Aug 2017 11:31:28 +0300 Subject: [PATCH 192/253] perf script python: Add support for exporting to sqlite3 Add support for exporting to SQLite 3 the same data as the PostgreSQL export. Committer note: Tested on RHEL 7.4 using the 1.2.2-4el python-pyside packages from EPEL. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1501749090-20357-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 6 +- .../python/bin/export-to-sqlite-record | 8 + .../python/bin/export-to-sqlite-report | 29 ++ tools/perf/scripts/python/export-to-sqlite.py | 451 ++++++++++++++++++ 4 files changed, 491 insertions(+), 3 deletions(-) create mode 100644 tools/perf/scripts/python/bin/export-to-sqlite-record create mode 100644 tools/perf/scripts/python/bin/export-to-sqlite-report create mode 100644 tools/perf/scripts/python/export-to-sqlite.py diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 4b6cdbf8f935..8e8ae3ad4cbf 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -104,9 +104,9 @@ system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. While it is possible to create scripts to analyze the data, an alternative -approach is available to export the data to a postgresql database. Refer to -script export-to-postgresql.py for more details, and to script -call-graph-from-postgresql.py for an example of using the database. +approach is available to export the data to a sqlite or postgresql database. +Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, +and to script call-graph-from-postgresql.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. diff --git a/tools/perf/scripts/python/bin/export-to-sqlite-record b/tools/perf/scripts/python/bin/export-to-sqlite-record new file mode 100644 index 000000000000..070204fd6d00 --- /dev/null +++ b/tools/perf/scripts/python/bin/export-to-sqlite-record @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# export perf data to a sqlite3 database. Can cover +# perf ip samples (excluding the tracepoints). No special +# record requirements, just record what you want to export. +# +perf record $@ diff --git a/tools/perf/scripts/python/bin/export-to-sqlite-report b/tools/perf/scripts/python/bin/export-to-sqlite-report new file mode 100644 index 000000000000..5ff6033e70ba --- /dev/null +++ b/tools/perf/scripts/python/bin/export-to-sqlite-report @@ -0,0 +1,29 @@ +#!/bin/bash +# description: export perf data to a sqlite3 database +# args: [database name] [columns] [calls] +n_args=0 +for i in "$@" +do + if expr match "$i" "-" > /dev/null ; then + break + fi + n_args=$(( $n_args + 1 )) +done +if [ "$n_args" -gt 3 ] ; then + echo "usage: export-to-sqlite-report [database name] [columns] [calls]" + exit +fi +if [ "$n_args" -gt 2 ] ; then + dbname=$1 + columns=$2 + calls=$3 + shift 3 +elif [ "$n_args" -gt 1 ] ; then + dbname=$1 + columns=$2 + shift 2 +elif [ "$n_args" -gt 0 ] ; then + dbname=$1 + shift +fi +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-sqlite.py $dbname $columns $calls diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py new file mode 100644 index 000000000000..f827bf77e9d2 --- /dev/null +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -0,0 +1,451 @@ +# export-to-sqlite.py: export perf data to a sqlite3 database +# Copyright (c) 2017, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +import os +import sys +import struct +import datetime + +# To use this script you will need to have installed package python-pyside which +# provides LGPL-licensed Python bindings for Qt. You will also need the package +# libqt4-sql-sqlite for Qt sqlite3 support. +# +# An example of using this script with Intel PT: +# +# $ perf record -e intel_pt//u ls +# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls +# 2017-07-31 14:26:07.326913 Creating database... +# 2017-07-31 14:26:07.538097 Writing records... +# 2017-07-31 14:26:09.889292 Adding indexes +# 2017-07-31 14:26:09.958746 Done +# +# To browse the database, sqlite3 can be used e.g. +# +# $ sqlite3 pt_example +# sqlite> .header on +# sqlite> select * from samples_view where id < 10; +# sqlite> .mode column +# sqlite> select * from samples_view where id < 10; +# sqlite> .tables +# sqlite> .schema samples_view +# sqlite> .quit +# +# An example of using the database is provided by the script +# call-graph-from-sql.py. Refer to that script for details. +# +# The database structure is practically the same as created by the script +# export-to-postgresql.py. Refer to that script for details. A notable +# difference is the 'transaction' column of the 'samples' table which is +# renamed 'transaction_' in sqlite because 'transaction' is a reserved word. + +from PySide.QtSql import * + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +# These perf imports are not used at present +#from perf_trace_context import * +#from Core import * + +perf_db_export_mode = True +perf_db_export_calls = False +perf_db_export_callchains = False + +def usage(): + print >> sys.stderr, "Usage is: export-to-sqlite.py [] [] []" + print >> sys.stderr, "where: columns 'all' or 'branches'" + print >> sys.stderr, " calls 'calls' => create calls and call_paths table" + print >> sys.stderr, " callchains 'callchains' => create call_paths table" + raise Exception("Too few arguments") + +if (len(sys.argv) < 2): + usage() + +dbname = sys.argv[1] + +if (len(sys.argv) >= 3): + columns = sys.argv[2] +else: + columns = "all" + +if columns not in ("all", "branches"): + usage() + +branches = (columns == "branches") + +for i in range(3,len(sys.argv)): + if (sys.argv[i] == "calls"): + perf_db_export_calls = True + elif (sys.argv[i] == "callchains"): + perf_db_export_callchains = True + else: + usage() + +def do_query(q, s): + if (q.exec_(s)): + return + raise Exception("Query failed: " + q.lastError().text()) + +def do_query_(q): + if (q.exec_()): + return + raise Exception("Query failed: " + q.lastError().text()) + +print datetime.datetime.today(), "Creating database..." + +db_exists = False +try: + f = open(dbname) + f.close() + db_exists = True +except: + pass + +if db_exists: + raise Exception(dbname + " already exists") + +db = QSqlDatabase.addDatabase('QSQLITE') +db.setDatabaseName(dbname) +db.open() + +query = QSqlQuery(db) + +do_query(query, 'PRAGMA journal_mode = OFF') +do_query(query, 'BEGIN TRANSACTION') + +do_query(query, 'CREATE TABLE selected_events (' + 'id integer NOT NULL PRIMARY KEY,' + 'name varchar(80))') +do_query(query, 'CREATE TABLE machines (' + 'id integer NOT NULL PRIMARY KEY,' + 'pid integer,' + 'root_dir varchar(4096))') +do_query(query, 'CREATE TABLE threads (' + 'id integer NOT NULL PRIMARY KEY,' + 'machine_id bigint,' + 'process_id bigint,' + 'pid integer,' + 'tid integer)') +do_query(query, 'CREATE TABLE comms (' + 'id integer NOT NULL PRIMARY KEY,' + 'comm varchar(16))') +do_query(query, 'CREATE TABLE comm_threads (' + 'id integer NOT NULL PRIMARY KEY,' + 'comm_id bigint,' + 'thread_id bigint)') +do_query(query, 'CREATE TABLE dsos (' + 'id integer NOT NULL PRIMARY KEY,' + 'machine_id bigint,' + 'short_name varchar(256),' + 'long_name varchar(4096),' + 'build_id varchar(64))') +do_query(query, 'CREATE TABLE symbols (' + 'id integer NOT NULL PRIMARY KEY,' + 'dso_id bigint,' + 'sym_start bigint,' + 'sym_end bigint,' + 'binding integer,' + 'name varchar(2048))') +do_query(query, 'CREATE TABLE branch_types (' + 'id integer NOT NULL PRIMARY KEY,' + 'name varchar(80))') + +if branches: + do_query(query, 'CREATE TABLE samples (' + 'id integer NOT NULL PRIMARY KEY,' + 'evsel_id bigint,' + 'machine_id bigint,' + 'thread_id bigint,' + 'comm_id bigint,' + 'dso_id bigint,' + 'symbol_id bigint,' + 'sym_offset bigint,' + 'ip bigint,' + 'time bigint,' + 'cpu integer,' + 'to_dso_id bigint,' + 'to_symbol_id bigint,' + 'to_sym_offset bigint,' + 'to_ip bigint,' + 'branch_type integer,' + 'in_tx boolean,' + 'call_path_id bigint)') +else: + do_query(query, 'CREATE TABLE samples (' + 'id integer NOT NULL PRIMARY KEY,' + 'evsel_id bigint,' + 'machine_id bigint,' + 'thread_id bigint,' + 'comm_id bigint,' + 'dso_id bigint,' + 'symbol_id bigint,' + 'sym_offset bigint,' + 'ip bigint,' + 'time bigint,' + 'cpu integer,' + 'to_dso_id bigint,' + 'to_symbol_id bigint,' + 'to_sym_offset bigint,' + 'to_ip bigint,' + 'period bigint,' + 'weight bigint,' + 'transaction_ bigint,' + 'data_src bigint,' + 'branch_type integer,' + 'in_tx boolean,' + 'call_path_id bigint)') + +if perf_db_export_calls or perf_db_export_callchains: + do_query(query, 'CREATE TABLE call_paths (' + 'id integer NOT NULL PRIMARY KEY,' + 'parent_id bigint,' + 'symbol_id bigint,' + 'ip bigint)') +if perf_db_export_calls: + do_query(query, 'CREATE TABLE calls (' + 'id integer NOT NULL PRIMARY KEY,' + 'thread_id bigint,' + 'comm_id bigint,' + 'call_path_id bigint,' + 'call_time bigint,' + 'return_time bigint,' + 'branch_count bigint,' + 'call_id bigint,' + 'return_id bigint,' + 'parent_call_path_id bigint,' + 'flags integer)') + +# printf was added to sqlite in version 3.8.3 +sqlite_has_printf = False +try: + do_query(query, 'SELECT printf("") FROM machines') + sqlite_has_printf = True +except: + pass + +def emit_to_hex(x): + if sqlite_has_printf: + return 'printf("%x", ' + x + ')' + else: + return x + +do_query(query, 'CREATE VIEW machines_view AS ' + 'SELECT ' + 'id,' + 'pid,' + 'root_dir,' + 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest' + ' FROM machines') + +do_query(query, 'CREATE VIEW dsos_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'short_name,' + 'long_name,' + 'build_id' + ' FROM dsos') + +do_query(query, 'CREATE VIEW symbols_view AS ' + 'SELECT ' + 'id,' + 'name,' + '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,' + 'dso_id,' + 'sym_start,' + 'sym_end,' + 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding' + ' FROM symbols') + +do_query(query, 'CREATE VIEW threads_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'process_id,' + 'pid,' + 'tid' + ' FROM threads') + +do_query(query, 'CREATE VIEW comm_threads_view AS ' + 'SELECT ' + 'comm_id,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid' + ' FROM comm_threads') + +if perf_db_export_calls or perf_db_export_callchains: + do_query(query, 'CREATE VIEW call_paths_view AS ' + 'SELECT ' + 'c.id,' + + emit_to_hex('c.ip') + ' AS ip,' + 'c.symbol_id,' + '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,' + '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,' + '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,' + 'c.parent_id,' + + emit_to_hex('p.ip') + ' AS parent_ip,' + 'p.symbol_id AS parent_symbol_id,' + '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,' + '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' + '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' + ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') +if perf_db_export_calls: + do_query(query, 'CREATE VIEW calls_view AS ' + 'SELECT ' + 'calls.id,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'call_path_id,' + + emit_to_hex('ip') + ' AS ip,' + 'symbol_id,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'call_time,' + 'return_time,' + 'return_time - call_time AS elapsed_time,' + 'branch_count,' + 'call_id,' + 'return_id,' + 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'parent_call_path_id' + ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') + +do_query(query, 'CREATE VIEW samples_view AS ' + 'SELECT ' + 'id,' + 'time,' + 'cpu,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,' + + emit_to_hex('ip') + ' AS ip_hex,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'sym_offset,' + '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,' + + emit_to_hex('to_ip') + ' AS to_ip_hex,' + '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,' + 'to_sym_offset,' + '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' + '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' + 'in_tx' + ' FROM samples') + +do_query(query, 'END TRANSACTION') + +evsel_query = QSqlQuery(db) +evsel_query.prepare("INSERT INTO selected_events VALUES (?, ?)") +machine_query = QSqlQuery(db) +machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)") +thread_query = QSqlQuery(db) +thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)") +comm_query = QSqlQuery(db) +comm_query.prepare("INSERT INTO comms VALUES (?, ?)") +comm_thread_query = QSqlQuery(db) +comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)") +dso_query = QSqlQuery(db) +dso_query.prepare("INSERT INTO dsos VALUES (?, ?, ?, ?, ?)") +symbol_query = QSqlQuery(db) +symbol_query.prepare("INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?)") +branch_type_query = QSqlQuery(db) +branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)") +sample_query = QSqlQuery(db) +if branches: + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") +else: + sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") +if perf_db_export_calls or perf_db_export_callchains: + call_path_query = QSqlQuery(db) + call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)") +if perf_db_export_calls: + call_query = QSqlQuery(db) + call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + +def trace_begin(): + print datetime.datetime.today(), "Writing records..." + do_query(query, 'BEGIN TRANSACTION') + # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs + evsel_table(0, "unknown") + machine_table(0, 0, "unknown") + thread_table(0, 0, 0, -1, -1) + comm_table(0, "unknown") + dso_table(0, 0, "unknown", "unknown", "") + symbol_table(0, 0, 0, 0, 0, "unknown") + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if perf_db_export_calls or perf_db_export_callchains: + call_path_table(0, 0, 0, 0) + +unhandled_count = 0 + +def trace_end(): + do_query(query, 'END TRANSACTION') + + print datetime.datetime.today(), "Adding indexes" + if perf_db_export_calls: + do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') + + if (unhandled_count): + print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" + print datetime.datetime.today(), "Done" + +def trace_unhandled(event_name, context, event_fields_dict): + global unhandled_count + unhandled_count += 1 + +def sched__sched_switch(*x): + pass + +def bind_exec(q, n, x): + for xx in x[0:n]: + q.addBindValue(str(xx)) + do_query_(q) + +def evsel_table(*x): + bind_exec(evsel_query, 2, x) + +def machine_table(*x): + bind_exec(machine_query, 3, x) + +def thread_table(*x): + bind_exec(thread_query, 5, x) + +def comm_table(*x): + bind_exec(comm_query, 2, x) + +def comm_thread_table(*x): + bind_exec(comm_thread_query, 3, x) + +def dso_table(*x): + bind_exec(dso_query, 5, x) + +def symbol_table(*x): + bind_exec(symbol_query, 6, x) + +def branch_type_table(*x): + bind_exec(branch_type_query, 2, x) + +def sample_table(*x): + if branches: + bind_exec(sample_query, 18, x) + else: + bind_exec(sample_query, 22, x) + +def call_path_table(*x): + bind_exec(call_path_query, 4, x) + +def call_return_table(*x): + bind_exec(call_query, 11, x) From 69e6e410f1a1e69cb656e8ebddaae0ba2138b235 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Aug 2017 11:31:29 +0300 Subject: [PATCH 193/253] perf script python: Rename call-graph-from-postgresql.py to call-graph-from-sql.py Rename call-graph-from-postgresql.py to call-graph-from-sql.py in preparation for adding support to it for SQLite 3. Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/1501749090-20357-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 2 +- ...graph-from-postgresql.py => call-graph-from-sql.py} | 10 +++++----- tools/perf/scripts/python/export-to-postgresql.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) rename tools/perf/scripts/python/{call-graph-from-postgresql.py => call-graph-from-sql.py} (96%) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 8e8ae3ad4cbf..ab1b0825130a 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -106,7 +106,7 @@ in transaction, respectively. While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script call-graph-from-postgresql.py for an example of using the database. +and to script call-graph-from-sql.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-sql.py similarity index 96% rename from tools/perf/scripts/python/call-graph-from-postgresql.py rename to tools/perf/scripts/python/call-graph-from-sql.py index ed9f7f3ccf22..f18406d3faf7 100644 --- a/tools/perf/scripts/python/call-graph-from-postgresql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -1,6 +1,6 @@ #!/usr/bin/python2 -# call-graph-from-postgresql.py: create call-graph from postgresql database -# Copyright (c) 2014, Intel Corporation. +# call-graph-from-sql.py: create call-graph from postgresql database +# Copyright (c) 2014-2017, Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, @@ -17,12 +17,12 @@ # Following on from the example in the export-to-postgresql.py script, a # call-graph can be displayed for the pt_example database like this: # -# python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example +# python tools/perf/scripts/python/call-graph-from-sql.py pt_example # # Note this script supports connecting to remote databases by setting hostname, # port, username, password, and dbname e.g. # -# python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" # # The result is a GUI window with a tree representing a context-sensitive # call-graph. Expanding a couple of levels of the tree and adjusting column @@ -291,7 +291,7 @@ class MainWindow(QMainWindow): if __name__ == '__main__': if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: call-graph-from-postgresql.py " + print >> sys.stderr, "Usage is: call-graph-from-sql.py " raise Exception("Too few arguments") dbname = sys.argv[1] diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index f57811443beb..efcaf6cac2eb 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -59,7 +59,7 @@ import datetime # pt_example=# \q # # An example of using the database is provided by the script -# call-graph-from-postgresql.py. Refer to that script for details. +# call-graph-from-sql.py. Refer to that script for details. # # Tables: # From 1fe03b5f2d70b48c4a10785edf2678ff05506e59 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Aug 2017 11:31:30 +0300 Subject: [PATCH 194/253] perf script python: Add support for sqlite3 to call-graph-from-sql.py Add support for SQLite 3 to the call-graph-from-sql.py script. The SQL statements work as is, so just detect the database type by checking if the SQLite 3 file exists. Committer notes: Tested collecting the PT data on a RHEL7.4, generating the SQLite3 database there and then moving it to a Fedora 26 system where the call-graph-from-sql.py script was run, using python-pyside version 1.2.2-7fc26 to see the callgraphs using Qt4. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1501749090-20357-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/call-graph-from-sql.py | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py index f18406d3faf7..b494a67a1c67 100644 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ b/tools/perf/scripts/python/call-graph-from-sql.py @@ -1,5 +1,5 @@ #!/usr/bin/python2 -# call-graph-from-sql.py: create call-graph from postgresql database +# call-graph-from-sql.py: create call-graph from sql database # Copyright (c) 2014-2017, Intel Corporation. # # This program is free software; you can redistribute it and/or modify it @@ -11,16 +11,17 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. -# To use this script you will need to have exported data using the -# export-to-postgresql.py script. Refer to that script for details. +# To use this script you will need to have exported data using either the +# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those +# scripts for details. # -# Following on from the example in the export-to-postgresql.py script, a +# Following on from the example in the export scripts, a # call-graph can be displayed for the pt_example database like this: # # python tools/perf/scripts/python/call-graph-from-sql.py pt_example # -# Note this script supports connecting to remote databases by setting hostname, -# port, username, password, and dbname e.g. +# Note that for PostgreSQL, this script supports connecting to remote databases +# by setting hostname, port, username, password, and dbname e.g. # # python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" # @@ -296,24 +297,35 @@ if __name__ == '__main__': dbname = sys.argv[1] - db = QSqlDatabase.addDatabase('QPSQL') + is_sqlite3 = False + try: + f = open(dbname) + if f.read(15) == "SQLite format 3": + is_sqlite3 = True + f.close() + except: + pass - opts = dbname.split() - for opt in opts: - if '=' in opt: - opt = opt.split('=') - if opt[0] == 'hostname': - db.setHostName(opt[1]) - elif opt[0] == 'port': - db.setPort(int(opt[1])) - elif opt[0] == 'username': - db.setUserName(opt[1]) - elif opt[0] == 'password': - db.setPassword(opt[1]) - elif opt[0] == 'dbname': - dbname = opt[1] - else: - dbname = opt + if is_sqlite3: + db = QSqlDatabase.addDatabase('QSQLITE') + else: + db = QSqlDatabase.addDatabase('QPSQL') + opts = dbname.split() + for opt in opts: + if '=' in opt: + opt = opt.split('=') + if opt[0] == 'hostname': + db.setHostName(opt[1]) + elif opt[0] == 'port': + db.setPort(int(opt[1])) + elif opt[0] == 'username': + db.setUserName(opt[1]) + elif opt[0] == 'password': + db.setPassword(opt[1]) + elif opt[0] == 'dbname': + dbname = opt[1] + else: + dbname = opt db.setDatabaseName(dbname) if not db.open(): From db26984a363e8b8e35783c402978e8acdf9041a5 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 15 Aug 2017 11:21:59 +0200 Subject: [PATCH 195/253] perf bpf: Fix endianness problem when loading parameters in prologue Perf's BPF prologue generator unconditionally fetches 8 bytes for function parameters, which causes problems on big endian machines. Thomas gives a detailed analysis for this problem: http://lkml.kernel.org/r/968ebda5-abe4-8830-8d69-49f62529d151@linux.vnet.ibm.com ---- 8< ---- I investigated perf test BPF for s390x and have a question regarding the 38.3 subtest (bpf-prologue test) which fails on s390x. When I turn on trace_printk in tests/bpf-script-test-prologue.c I see this output in /sys/kernel/debug/tracing/trace: [root@s8360047 perf]# cat /sys/kernel/debug/tracing/trace perf-30229 [000] d..2 170161.535791: : f_mode 2001d00000000 offset:0 orig:0 perf-30229 [000] d..2 170161.535809: : f_mode 6001f00000000 offset:0 orig:0 perf-30229 [000] d..2 170161.535815: : f_mode 6001f00000000 offset:1 orig:0 perf-30229 [000] d..2 170161.535819: : f_mode 2001d00000000 offset:1 orig:0 perf-30229 [000] d..2 170161.535822: : f_mode 2001d00000000 offset:2 orig:1 perf-30229 [000] d..2 170161.535825: : f_mode 6001f00000000 offset:2 orig:1 perf-30229 [000] d..2 170161.535828: : f_mode 6001f00000000 offset:3 orig:1 perf-30229 [000] d..2 170161.535832: : f_mode 2001d00000000 offset:3 orig:1 perf-30229 [000] d..2 170161.535835: : f_mode 2001d00000000 offset:4 orig:0 perf-30229 [000] d..2 170161.535841: : f_mode 6001f00000000 offset:4 orig:0 [...] There are 3 parameters the eBPF program tests/bpf-script-test-prologue.c accesses: f_mode (member of struct file at offset 140) offset and orig. They are parameters of the lseek() system call triggered in this test case in function llseek_loop(). What is really strange is the value of f_mode. It is an 8 byte value, whereas in the probe event it is defined as a 4 byte value. The lower 4 bytes are all zero and do not belong to member f_mode. The correct value should be 2001d for read-only and 6001f for read-write open mode. Here is the output of the 'perf test -vv bpf' trace: Try to find probe point from debuginfo. Matched function: null_lseek [2d9310d] Probe point found: null_lseek+0 Searching 'file' variable in context. Converting variable file into trace event. converting f_mode in file f_mode type is unsigned int. Opening /sys/kernel/debug/tracing//README write=0 Searching 'offset' variable in context. Converting variable offset into trace event. offset type is long long int. Searching 'orig' variable in context. Converting variable orig into trace event. orig type is int. Found 1 probe_trace_events. Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: p:perf_bpf_probe/func _text+8794224 f_mode=+140(%r2):x32 ---- 8< ---- This patch parses the type of each argument and converts data from memory to expected type. Now the test runs successfully on 4.13.0-rc5: [root@s8360046 perf]# ./perf test bpf 38: BPF filter : 38.1: Basic BPF filtering : Ok 38.2: BPF pinning : Ok 38.3: BPF prologue generation : Ok 38.4: BPF relocation checker : Ok [root@s8360046 perf]# Signed-off-by: Wang Nan Cc: Hendrik Brueckner Link: http://lkml.kernel.org/r/20170815092159.31912-1-tmricht@linux.vnet.ibm.com Signed-off-by: Thomas-Mich Richter Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-test-prologue.c | 4 +- tools/perf/util/bpf-prologue.c | 49 ++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index b4ebc75e25ae..43f1e16486f4 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -26,9 +26,11 @@ static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) 6; SEC("func=null_lseek file->f_mode offset orig") -int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode, +int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode, unsigned long offset, unsigned long orig) { + fmode_t f_mode = (fmode_t)_f_mode; + if (err) return 0; if (f_mode & FMODE_WRITE) diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 1356220a9f1b..827f9140f3b8 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -58,6 +58,46 @@ check_pos(struct bpf_insn_pos *pos) return 0; } +/* + * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see + * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM + * instruction (BPF_{B,H,W,DW}). + */ +static int +argtype_to_ldx_size(const char *type) +{ + int arg_size = type ? atoi(&type[1]) : 64; + + switch (arg_size) { + case 8: + return BPF_B; + case 16: + return BPF_H; + case 32: + return BPF_W; + case 64: + default: + return BPF_DW; + } +} + +static const char * +insn_sz_to_str(int insn_sz) +{ + switch (insn_sz) { + case BPF_B: + return "BPF_B"; + case BPF_H: + return "BPF_H"; + case BPF_W: + return "BPF_W"; + case BPF_DW: + return "BPF_DW"; + default: + return "UNKNOWN"; + } +} + /* Give it a shorter name */ #define ins(i, p) append_insn((i), (p)) @@ -258,9 +298,14 @@ gen_prologue_slowpath(struct bpf_insn_pos *pos, } /* Final pass: read to registers */ - for (i = 0; i < nargs; i++) - ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i, + for (i = 0; i < nargs; i++) { + int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW; + + pr_debug("prologue: load arg %d, insn_sz is %s\n", + i, insn_sz_to_str(insn_sz)); + ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i, BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); + } ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); From 35435cd06081d7db96bc617b65ba556f8e24340e Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 16 Aug 2017 16:20:40 -0300 Subject: [PATCH 196/253] perf test shell: Replace '|&' with '2>&1 |' to work with more shells Since we do not specify bash (and/or zsh) as a requirement, use the standard error redirection that is more widely supported. Signed-off-by: Kim Phillips Link: http://lkml.kernel.org/n/tip-ji5mhn3iilgch3eaay6csr6z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe.sh | 2 +- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 4 ++-- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index 61da3b2c9bca..6293cc660947 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh @@ -1,6 +1,6 @@ # Arnaldo Carvalho de Melo , 2017 skip_if_no_perf_probe() { - perf probe |& grep -q 'is not a perf-command' && return 2 + perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 return 0 } diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index f832395daad7..30a950c9d407 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -1,6 +1,6 @@ # Arnaldo Carvalho de Melo , 2017 -perf probe -l |& grep -q probe:vfs_getname +perf probe -l 2>&1 | grep -q probe:vfs_getname had_vfs_getname=$? cleanup_probe_vfs_getname() { @@ -12,7 +12,7 @@ cleanup_probe_vfs_getname() { add_probe_vfs_getname() { local verbose=$1 if [ $had_vfs_getname -eq 1 ] ; then - line=$(perf probe -L getname_flags |& egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') + line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string" fi } diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 99dafad61954..462fc755092e 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -22,7 +22,7 @@ trace_libc_inet_pton_backtrace() { expected[7]="getaddrinfo[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" expected[8]=".*\(.*/bin/ping.*\)$" - perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 |& grep -v ^$ | while read line ; do + perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do echo $line echo "$line" | egrep -q "${expected[$idx]}" if [ $? -ne 0 ] ; then From f15d5e6dba97eb36eb638d62fad697937a9602e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Tue, 15 Aug 2017 11:33:30 -0500 Subject: [PATCH 197/253] tools lib bpf: Fix double file test in Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Makefile verifies the same file exists twice: test -f ../../../include/uapi/linux/bpf.h -a \ -f ../../../include/uapi/linux/bpf.h The purpose of the check is to ensure the diff (immediately after the test) doesn't fail with these two files: tools/include/uapi/linux/bpf.h include/uapi/linux/bpf.h Same recipe for bpf_common: test -f ../../../include/uapi/linux/bpf_common.h -a \ -f ../../../include/uapi/linux/bpf_common.h This corrects the location of the tests. Signed-off-by: Daniel Díaz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1502814810-960-1-git-send-email-daniel.diaz@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e87b5903f4bb..4ed0257dc1f3 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -154,10 +154,10 @@ all: fixdep $(VERSION_FILES) all_cmd all_cmd: $(CMD_TARGETS) $(BPF_IN): force elfdep bpfdep - @(test -f ../../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ + @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true - @(test -f ../../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ + @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf From c73881eeb1a0ee9b45d8bc9f45ab02e23d81c5c5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:02:00 -0700 Subject: [PATCH 198/253] perf jevents: Support FCMask and PortMask Skylake server uncore IIO events need new FCMask/PortMask fields. Support those in the json parser and pass it through as a filter. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170816220201.19182-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 2350f6099a46..d51dc9ca8861 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -137,6 +137,8 @@ static struct field { { "AnyThread", "any=" }, { "EdgeDetect", "edge=" }, { "SampleAfterValue", "period=" }, + { "FCMask", "fc_mask=" }, + { "PortMask", "ch_mask=" }, { NULL, NULL } }; From d6d4fc6fefda26a88eb99d30e87a8834c59c144a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 15:12:55 -0300 Subject: [PATCH 199/253] perf trace: Fix off by one string allocation problem We need to consider the null terminator, oops, fix it. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 017037ff3d0b ("perf trace: Allow specifying list of syscalls and events in -e/--expr/--event") Link: http://lkml.kernel.org/n/tip-j79jpqqe91gvxqmsgxgfn2ni@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index de02413a25d3..91905839e386 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2806,7 +2806,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, struct trace *trace = (struct trace *)opt->value; const char *s = str; char *sep = NULL, *lists[2] = { NULL, NULL, }; - int len = strlen(str), err = -1, list; + int len = strlen(str) + 1, err = -1, list; char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char group_name[PATH_MAX]; From 07806a1df1ccbdd2fa5ba56d01bb77d11c828331 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 15:57:30 -0300 Subject: [PATCH 200/253] perf events parse: Remove some needless local variables Those are just casting a void pointer to a struct to then pass them to functions, i.e. remove the local variables and pass the void pointer directly, the casting will be done and the code will be shorter. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bzfodzr3mb46gy7u7v0mqad6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 04fd8c9af9f9..96c0fa59aa8f 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -225,14 +225,13 @@ event_def: event_pmu | event_pmu: PE_NAME opt_event_config { - struct parse_events_evlist *data = _data; struct list_head *list, *orig_terms, *terms; if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(data, list, $1, $2)) { + if (parse_events_add_pmu(_data, list, $1, $2)) { struct perf_pmu *pmu = NULL; int ok = 0; @@ -245,7 +244,7 @@ PE_NAME opt_event_config if (!strncmp($1, name, strlen($1))) { if (parse_events_copy_term_list(orig_terms, &terms)) YYABORT; - if (!parse_events_add_pmu(data, list, pmu->name, terms)) + if (!parse_events_add_pmu(_data, list, pmu->name, terms)) ok++; parse_events_terms__delete(terms); } @@ -286,26 +285,24 @@ PE_VALUE_SYM_SW event_legacy_symbol: value_sym '/' event_config '/' { - struct parse_events_evlist *data = _data; struct list_head *list; int type = $1 >> 16; int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); + ABORT_ON(parse_events_add_numeric(_data, list, type, config, $3)); parse_events_terms__delete($3); $$ = list; } | value_sym sep_slash_dc { - struct parse_events_evlist *data = _data; struct list_head *list; int type = $1 >> 16; int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL)); + ABORT_ON(parse_events_add_numeric(_data, list, type, config, NULL)); $$ = list; } @@ -432,11 +429,10 @@ PE_NAME ':' PE_NAME event_legacy_numeric: PE_VALUE ':' PE_VALUE opt_event_config { - struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4)); + ABORT_ON(parse_events_add_numeric(_data, list, (u32)$1, $3, $4)); parse_events_terms__delete($4); $$ = list; } @@ -444,11 +440,10 @@ PE_VALUE ':' PE_VALUE opt_event_config event_legacy_raw: PE_RAW opt_event_config { - struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2)); + ABORT_ON(parse_events_add_numeric(_data, list, PERF_TYPE_RAW, $1, $2)); parse_events_terms__delete($2); $$ = list; } @@ -468,11 +463,10 @@ PE_BPF_OBJECT opt_event_config | PE_BPF_SOURCE opt_event_config { - struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2)); + ABORT_ON(parse_events_load_bpf(_data, list, $1, true, $2)); parse_events_terms__delete($2); $$ = list; } From 5d369a75eda5855d64981668a1d60cfac00d52e9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 14:13:25 -0300 Subject: [PATCH 201/253] perf events parse: Rename parsing state struct to clearer name Rename it from 'parse_events_evlist' to 'parse_events_state' to better state that this is parsing state that has to be passed around. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dursqtg2h2w98ztaa297u43x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- tools/perf/util/parse-events.c | 24 ++++++++++++------------ tools/perf/util/parse-events.h | 14 +++++++------- tools/perf/util/parse-events.y | 22 +++++++++++----------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 4422ab636d30..45ad248e6ccd 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -124,7 +124,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct perf_evlist *evlist; int i, ret = TEST_FAIL, err = 0, count = 0; - struct parse_events_evlist parse_evlist; + struct parse_events_state parse_evlist; struct parse_events_error parse_error; bzero(&parse_error, sizeof(parse_error)); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 84e301073885..272eab7f5ac8 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -589,7 +589,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, } struct __add_bpf_event_param { - struct parse_events_evlist *data; + struct parse_events_state *data; struct list_head *list; struct list_head *head_config; }; @@ -599,7 +599,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, { LIST_HEAD(new_evsels); struct __add_bpf_event_param *param = _param; - struct parse_events_evlist *evlist = param->data; + struct parse_events_state *parse_state = param->data; struct list_head *list = param->list; struct perf_evsel *pos; int err; @@ -607,8 +607,8 @@ static int add_bpf_event(const char *group, const char *event, int fd, pr_debug("add bpf event %s:%s and attach bpf program %d\n", group, event, fd); - err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, group, - event, evlist->error, + err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group, + event, parse_state->error, param->head_config); if (err) { struct perf_evsel *evsel, *tmp; @@ -632,7 +632,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, return 0; } -int parse_events_load_bpf_obj(struct parse_events_evlist *data, +int parse_events_load_bpf_obj(struct parse_events_state *data, struct list_head *list, struct bpf_object *obj, struct list_head *head_config) @@ -686,7 +686,7 @@ int parse_events_load_bpf_obj(struct parse_events_evlist *data, } static int -parse_events_config_bpf(struct parse_events_evlist *data, +parse_events_config_bpf(struct parse_events_state *data, struct bpf_object *obj, struct list_head *head_config) { @@ -762,7 +762,7 @@ split_bpf_config_terms(struct list_head *evt_head_config, list_move_tail(&term->list, obj_head_config); } -int parse_events_load_bpf(struct parse_events_evlist *data, +int parse_events_load_bpf(struct parse_events_state *data, struct list_head *list, char *bpf_file_name, bool source, @@ -1184,7 +1184,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, err, head_config); } -int parse_events_add_numeric(struct parse_events_evlist *data, +int parse_events_add_numeric(struct parse_events_state *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config) @@ -1209,7 +1209,7 @@ int parse_events_add_numeric(struct parse_events_evlist *data, get_config_name(head_config), &config_terms); } -int parse_events_add_pmu(struct parse_events_evlist *data, +int parse_events_add_pmu(struct parse_events_state *data, struct list_head *list, char *name, struct list_head *head_config) { @@ -1267,7 +1267,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, return evsel ? 0 : -ENOMEM; } -int parse_events_multi_pmu_add(struct parse_events_evlist *data, +int parse_events_multi_pmu_add(struct parse_events_state *data, char *str, struct list_head **listp) { struct list_head *head; @@ -1675,7 +1675,7 @@ int parse_events_terms(struct list_head *terms, const char *str) int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err) { - struct parse_events_evlist data = { + struct parse_events_state data = { .list = LIST_HEAD_INIT(data.list), .idx = evlist->nr_entries, .error = err, @@ -2520,7 +2520,7 @@ void parse_events__clear_array(struct parse_events_array *a) zfree(&a->ranges); } -void parse_events_evlist_error(struct parse_events_evlist *data, +void parse_events_evlist_error(struct parse_events_state *data, int idx, const char *str) { struct parse_events_error *err = data->error; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a235f4d6d5e5..8fff8423469b 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -108,7 +108,7 @@ struct parse_events_error { char *help; /* optional help string */ }; -struct parse_events_evlist { +struct parse_events_state { struct list_head list; int idx; int nr_groups; @@ -143,18 +143,18 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, struct list_head *head_config); -int parse_events_load_bpf(struct parse_events_evlist *data, +int parse_events_load_bpf(struct parse_events_state *data, struct list_head *list, char *bpf_file_name, bool source, struct list_head *head_config); /* Provide this function for perf test */ struct bpf_object; -int parse_events_load_bpf_obj(struct parse_events_evlist *data, +int parse_events_load_bpf_obj(struct parse_events_state *data, struct list_head *list, struct bpf_object *obj, struct list_head *head_config); -int parse_events_add_numeric(struct parse_events_evlist *data, +int parse_events_add_numeric(struct parse_events_state *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config); @@ -164,11 +164,11 @@ int parse_events_add_cache(struct list_head *list, int *idx, struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); -int parse_events_add_pmu(struct parse_events_evlist *data, +int parse_events_add_pmu(struct parse_events_state *data, struct list_head *list, char *name, struct list_head *head_config); -int parse_events_multi_pmu_add(struct parse_events_evlist *data, +int parse_events_multi_pmu_add(struct parse_events_state *data, char *str, struct list_head **listp); @@ -180,7 +180,7 @@ perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); -void parse_events_evlist_error(struct parse_events_evlist *data, +void parse_events_evlist_error(struct parse_events_state *data, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 96c0fa59aa8f..403a221266b8 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -33,7 +33,7 @@ do { \ } while (0) static void inc_group_count(struct list_head *list, - struct parse_events_evlist *data) + struct parse_events_state *data) { /* Count groups only have more than 1 members */ if (!list_is_last(list->next, list)) @@ -115,7 +115,7 @@ PE_START_TERMS start_terms start_events: groups { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; parse_events_update_lists($1, &data->list); } @@ -309,7 +309,7 @@ value_sym sep_slash_dc event_legacy_cache: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct parse_events_error *error = data->error; struct list_head *list; @@ -321,7 +321,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_e | PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct parse_events_error *error = data->error; struct list_head *list; @@ -333,7 +333,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config | PE_NAME_CACHE_TYPE opt_event_config { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct parse_events_error *error = data->error; struct list_head *list; @@ -346,7 +346,7 @@ PE_NAME_CACHE_TYPE opt_event_config event_legacy_mem: PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct list_head *list; ALLOC_LIST(list); @@ -357,7 +357,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc | PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct list_head *list; ALLOC_LIST(list); @@ -368,7 +368,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc | PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct list_head *list; ALLOC_LIST(list); @@ -379,7 +379,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc | PE_PREFIX_MEM PE_VALUE sep_dc { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct list_head *list; ALLOC_LIST(list); @@ -391,7 +391,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc event_legacy_tracepoint: tracepoint_name opt_event_config { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct parse_events_error *error = data->error; struct list_head *list; @@ -451,7 +451,7 @@ PE_RAW opt_event_config event_bpf_file: PE_BPF_OBJECT opt_event_config { - struct parse_events_evlist *data = _data; + struct parse_events_state *data = _data; struct parse_events_error *error = data->error; struct list_head *list; From d17d0878f456c8227345b6c76b918ec068fa0abd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 14:22:50 -0300 Subject: [PATCH 202/253] perf events parse: Use just one parse events state struct Andi reported problems when parse errors were detected with vendor events (json), because in the yyparse/parse_events_parse function we dereferenced the _data parameter to two different structs, with different layouts, which ended up making parse_events_evlist->error to point to random stack addresses. Fix it by making _data to always be struct parse_events_state, changing the only place where 'struct parse_events_term' was used in parse_events.y. Reported-by: Andi Kleen Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-bc27lshz823hxl8n9nkelcgh@git.kernel.org Fixes: 90e2b22dee90 ("perf/tool: Add support to reuse event grammar to parse out terms") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-events.h | 5 +---- tools/perf/util/parse-events.y | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 272eab7f5ac8..d4fcf048aa9e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1656,7 +1656,7 @@ static int parse_events__scanner(const char *str, void *data, int start_token) */ int parse_events_terms(struct list_head *terms, const char *str) { - struct parse_events_terms data = { + struct parse_events_state data = { .terms = NULL, }; int ret; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8fff8423469b..2a3617937894 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -114,10 +114,7 @@ struct parse_events_state { int nr_groups; struct parse_events_error *error; struct perf_evlist *evlist; -}; - -struct parse_events_terms { - struct list_head *terms; + struct list_head *terms; }; void parse_events__shrink_config_terms(void); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 403a221266b8..4b37546e4914 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -488,7 +488,7 @@ opt_event_config: start_terms: event_config { - struct parse_events_terms *data = _data; + struct parse_events_state *data = _data; data->terms = $1; } From 5d9cdc1181c34f959e9fb8e24624223172071871 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 16:13:34 -0300 Subject: [PATCH 203/253] perf events parse: Rename parse_events_parse arguments Calling them just "data" is too vague, call it 'perf_state', to make it clearer, for instance, when looking at patch hunks. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-rnhk5yb05wem77rjpclrh7so@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 16 +++--- tools/perf/util/parse-events.c | 92 +++++++++++++++++----------------- tools/perf/util/parse-events.h | 12 ++--- tools/perf/util/parse-events.y | 88 ++++++++++++++++---------------- 4 files changed, 104 insertions(+), 104 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 45ad248e6ccd..34c22cdf4d5d 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -124,16 +124,16 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct perf_evlist *evlist; int i, ret = TEST_FAIL, err = 0, count = 0; - struct parse_events_state parse_evlist; + struct parse_events_state parse_state; struct parse_events_error parse_error; bzero(&parse_error, sizeof(parse_error)); - bzero(&parse_evlist, sizeof(parse_evlist)); - parse_evlist.error = &parse_error; - INIT_LIST_HEAD(&parse_evlist.list); + bzero(&parse_state, sizeof(parse_state)); + parse_state.error = &parse_error; + INIT_LIST_HEAD(&parse_state.list); - err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL); - if (err || list_empty(&parse_evlist.list)) { + err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); + if (err || list_empty(&parse_state.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; } @@ -155,8 +155,8 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - perf_evlist__splice_list_tail(evlist, &parse_evlist.list); - evlist->nr_groups = parse_evlist.nr_groups; + perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist->nr_groups = parse_state.nr_groups; perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d4fcf048aa9e..f44aeba51d1f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -34,7 +34,7 @@ #ifdef PARSER_DEBUG extern int parse_events_debug; #endif -int parse_events_parse(void *data, void *scanner); +int parse_events_parse(void *parse_state, void *scanner); static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused); @@ -589,7 +589,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, } struct __add_bpf_event_param { - struct parse_events_state *data; + struct parse_events_state *parse_state; struct list_head *list; struct list_head *head_config; }; @@ -599,7 +599,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, { LIST_HEAD(new_evsels); struct __add_bpf_event_param *param = _param; - struct parse_events_state *parse_state = param->data; + struct parse_events_state *parse_state = param->parse_state; struct list_head *list = param->list; struct perf_evsel *pos; int err; @@ -632,14 +632,14 @@ static int add_bpf_event(const char *group, const char *event, int fd, return 0; } -int parse_events_load_bpf_obj(struct parse_events_state *data, +int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct list_head *list, struct bpf_object *obj, struct list_head *head_config) { int err; char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {data, list, head_config}; + struct __add_bpf_event_param param = {parse_state, list, head_config}; static bool registered_unprobe_atexit = false; if (IS_ERR(obj) || !obj) { @@ -680,13 +680,13 @@ int parse_events_load_bpf_obj(struct parse_events_state *data, return 0; errout: - data->error->help = strdup("(add -v to see detail)"); - data->error->str = strdup(errbuf); + parse_state->error->help = strdup("(add -v to see detail)"); + parse_state->error->str = strdup(errbuf); return err; } static int -parse_events_config_bpf(struct parse_events_state *data, +parse_events_config_bpf(struct parse_events_state *parse_state, struct bpf_object *obj, struct list_head *head_config) { @@ -705,28 +705,28 @@ parse_events_config_bpf(struct parse_events_state *data, "Invalid config term for BPF object"); errbuf[BUFSIZ - 1] = '\0'; - data->error->idx = term->err_term; - data->error->str = strdup(errbuf); + parse_state->error->idx = term->err_term; + parse_state->error->str = strdup(errbuf); return -EINVAL; } - err = bpf__config_obj(obj, term, data->evlist, &error_pos); + err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos); if (err) { - bpf__strerror_config_obj(obj, term, data->evlist, + bpf__strerror_config_obj(obj, term, parse_state->evlist, &error_pos, err, errbuf, sizeof(errbuf)); - data->error->help = strdup( + parse_state->error->help = strdup( "Hint:\tValid config terms:\n" " \tmap:[].value=[value]\n" " \tmap:[].event=[event]\n" "\n" " \twhere is something like [0,3...5] or [all]\n" " \t(add -v to see detail)"); - data->error->str = strdup(errbuf); + parse_state->error->str = strdup(errbuf); if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) - data->error->idx = term->err_val; + parse_state->error->idx = term->err_val; else - data->error->idx = term->err_term + error_pos; + parse_state->error->idx = term->err_term + error_pos; return err; } } @@ -762,7 +762,7 @@ split_bpf_config_terms(struct list_head *evt_head_config, list_move_tail(&term->list, obj_head_config); } -int parse_events_load_bpf(struct parse_events_state *data, +int parse_events_load_bpf(struct parse_events_state *parse_state, struct list_head *list, char *bpf_file_name, bool source, @@ -790,15 +790,15 @@ int parse_events_load_bpf(struct parse_events_state *data, -err, errbuf, sizeof(errbuf)); - data->error->help = strdup("(add -v to see detail)"); - data->error->str = strdup(errbuf); + parse_state->error->help = strdup("(add -v to see detail)"); + parse_state->error->str = strdup(errbuf); return err; } - err = parse_events_load_bpf_obj(data, list, obj, head_config); + err = parse_events_load_bpf_obj(parse_state, list, obj, head_config); if (err) return err; - err = parse_events_config_bpf(data, obj, &obj_head_config); + err = parse_events_config_bpf(parse_state, obj, &obj_head_config); /* * Caller doesn't know anything about obj_head_config, @@ -1184,7 +1184,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, err, head_config); } -int parse_events_add_numeric(struct parse_events_state *data, +int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config) @@ -1197,7 +1197,7 @@ int parse_events_add_numeric(struct parse_events_state *data, attr.config = config; if (head_config) { - if (config_attr(&attr, head_config, data->error, + if (config_attr(&attr, head_config, parse_state->error, config_term_common)) return -EINVAL; @@ -1205,11 +1205,11 @@ int parse_events_add_numeric(struct parse_events_state *data, return -ENOMEM; } - return add_event(list, &data->idx, &attr, + return add_event(list, &parse_state->idx, &attr, get_config_name(head_config), &config_terms); } -int parse_events_add_pmu(struct parse_events_state *data, +int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config) { @@ -1232,7 +1232,7 @@ int parse_events_add_pmu(struct parse_events_state *data, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); return evsel ? 0 : -ENOMEM; } @@ -1243,16 +1243,16 @@ int parse_events_add_pmu(struct parse_events_state *data, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - if (config_attr(&attr, head_config, data->error, config_term_pmu)) + if (config_attr(&attr, head_config, parse_state->error, config_term_pmu)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) return -ENOMEM; - if (perf_pmu__config(pmu, &attr, head_config, data->error)) + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) return -EINVAL; - evsel = __add_event(list, &data->idx, &attr, + evsel = __add_event(list, &parse_state->idx, &attr, get_config_name(head_config), pmu->cpus, &config_terms); if (evsel) { @@ -1267,7 +1267,7 @@ int parse_events_add_pmu(struct parse_events_state *data, return evsel ? 0 : -ENOMEM; } -int parse_events_multi_pmu_add(struct parse_events_state *data, +int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head **listp) { struct list_head *head; @@ -1296,7 +1296,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *data, return -1; list_add_tail(&term->list, head); - if (!parse_events_add_pmu(data, list, + if (!parse_events_add_pmu(parse_state, list, pmu->name, head)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); @@ -1628,7 +1628,7 @@ perf_pmu__parse_check(const char *name) return r ? r->type : PMU_EVENT_SYMBOL_ERR; } -static int parse_events__scanner(const char *str, void *data, int start_token) +static int parse_events__scanner(const char *str, void *parse_state, int start_token) { YY_BUFFER_STATE buffer; void *scanner; @@ -1643,7 +1643,7 @@ static int parse_events__scanner(const char *str, void *data, int start_token) #ifdef PARSER_DEBUG parse_events_debug = 1; #endif - ret = parse_events_parse(data, scanner); + ret = parse_events_parse(parse_state, scanner); parse_events__flush_buffer(buffer, scanner); parse_events__delete_buffer(buffer, scanner); @@ -1656,45 +1656,45 @@ static int parse_events__scanner(const char *str, void *data, int start_token) */ int parse_events_terms(struct list_head *terms, const char *str) { - struct parse_events_state data = { + struct parse_events_state parse_state = { .terms = NULL, }; int ret; - ret = parse_events__scanner(str, &data, PE_START_TERMS); + ret = parse_events__scanner(str, &parse_state, PE_START_TERMS); if (!ret) { - list_splice(data.terms, terms); - zfree(&data.terms); + list_splice(parse_state.terms, terms); + zfree(&parse_state.terms); return 0; } - parse_events_terms__delete(data.terms); + parse_events_terms__delete(parse_state.terms); return ret; } int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err) { - struct parse_events_state data = { - .list = LIST_HEAD_INIT(data.list), + struct parse_events_state parse_state = { + .list = LIST_HEAD_INIT(parse_state.list), .idx = evlist->nr_entries, .error = err, .evlist = evlist, }; int ret; - ret = parse_events__scanner(str, &data, PE_START_EVENTS); + ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { struct perf_evsel *last; - if (list_empty(&data.list)) { + if (list_empty(&parse_state.list)) { WARN_ONCE(true, "WARNING: event parser found nothing"); return -1; } - perf_evlist__splice_list_tail(evlist, &data.list); - evlist->nr_groups += data.nr_groups; + perf_evlist__splice_list_tail(evlist, &parse_state.list); + evlist->nr_groups += parse_state.nr_groups; last = perf_evlist__last(evlist); last->cmdline_group_boundary = true; @@ -2520,10 +2520,10 @@ void parse_events__clear_array(struct parse_events_array *a) zfree(&a->ranges); } -void parse_events_evlist_error(struct parse_events_state *data, +void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str) { - struct parse_events_error *err = data->error; + struct parse_events_error *err = parse_state->error; if (!err) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 2a3617937894..635135125111 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -140,18 +140,18 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, struct list_head *head_config); -int parse_events_load_bpf(struct parse_events_state *data, +int parse_events_load_bpf(struct parse_events_state *parse_state, struct list_head *list, char *bpf_file_name, bool source, struct list_head *head_config); /* Provide this function for perf test */ struct bpf_object; -int parse_events_load_bpf_obj(struct parse_events_state *data, +int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct list_head *list, struct bpf_object *obj, struct list_head *head_config); -int parse_events_add_numeric(struct parse_events_state *data, +int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); @@ -161,11 +161,11 @@ int parse_events_add_cache(struct list_head *list, int *idx, struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); -int parse_events_add_pmu(struct parse_events_state *data, +int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config); -int parse_events_multi_pmu_add(struct parse_events_state *data, +int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head **listp); @@ -177,7 +177,7 @@ perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); -void parse_events_evlist_error(struct parse_events_state *data, +void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 4b37546e4914..e81a20ea8d7d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,5 +1,5 @@ %pure-parser -%parse-param {void *_data} +%parse-param {void *_parse_state} %parse-param {void *scanner} %lex-param {void* scanner} %locations @@ -17,7 +17,7 @@ #include "parse-events.h" #include "parse-events-bison.h" -void parse_events_error(YYLTYPE *loc, void *data, void *scanner, char const *msg); +void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg); #define ABORT_ON(val) \ do { \ @@ -33,11 +33,11 @@ do { \ } while (0) static void inc_group_count(struct list_head *list, - struct parse_events_state *data) + struct parse_events_state *parse_state) { /* Count groups only have more than 1 members */ if (!list_is_last(list->next, list)) - data->nr_groups++; + parse_state->nr_groups++; } %} @@ -115,9 +115,9 @@ PE_START_TERMS start_terms start_events: groups { - struct parse_events_state *data = _data; + struct parse_events_state *parse_state = _parse_state; - parse_events_update_lists($1, &data->list); + parse_events_update_lists($1, &parse_state->list); } groups: @@ -159,7 +159,7 @@ PE_NAME '{' events '}' { struct list_head *list = $3; - inc_group_count(list, _data); + inc_group_count(list, _parse_state); parse_events__set_leader($1, list); $$ = list; } @@ -168,7 +168,7 @@ PE_NAME '{' events '}' { struct list_head *list = $2; - inc_group_count(list, _data); + inc_group_count(list, _parse_state); parse_events__set_leader(NULL, list); $$ = list; } @@ -231,7 +231,7 @@ PE_NAME opt_event_config YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_data, list, $1, $2)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2)) { struct perf_pmu *pmu = NULL; int ok = 0; @@ -244,7 +244,7 @@ PE_NAME opt_event_config if (!strncmp($1, name, strlen($1))) { if (parse_events_copy_term_list(orig_terms, &terms)) YYABORT; - if (!parse_events_add_pmu(_data, list, pmu->name, terms)) + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms)) ok++; parse_events_terms__delete(terms); } @@ -261,7 +261,7 @@ PE_KERNEL_PMU_EVENT sep_dc { struct list_head *list; - if (parse_events_multi_pmu_add(_data, $1, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, $1, &list) < 0) YYABORT; $$ = list; } @@ -272,7 +272,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc char pmu_name[128]; snprintf(&pmu_name, 128, "%s-%s", $1, $3); - if (parse_events_multi_pmu_add(_data, pmu_name, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) YYABORT; $$ = list; } @@ -290,7 +290,7 @@ value_sym '/' event_config '/' int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(_data, list, type, config, $3)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, $3)); parse_events_terms__delete($3); $$ = list; } @@ -302,43 +302,43 @@ value_sym sep_slash_dc int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(_data, list, type, config, NULL)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL)); $$ = list; } event_legacy_cache: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config { - struct parse_events_state *data = _data; - struct parse_events_error *error = data->error; + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6)); + ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6)); parse_events_terms__delete($6); $$ = list; } | PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config { - struct parse_events_state *data = _data; - struct parse_events_error *error = data->error; + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4)); + ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4)); parse_events_terms__delete($4); $$ = list; } | PE_NAME_CACHE_TYPE opt_event_config { - struct parse_events_state *data = _data; - struct parse_events_error *error = data->error; + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2)); + ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2)); parse_events_terms__delete($2); $$ = list; } @@ -346,44 +346,44 @@ PE_NAME_CACHE_TYPE opt_event_config event_legacy_mem: PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc { - struct parse_events_state *data = _data; + struct parse_events_state *parse_state = _parse_state; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx, (void *) $2, $6, $4)); $$ = list; } | PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc { - struct parse_events_state *data = _data; + struct parse_events_state *parse_state = _parse_state; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx, (void *) $2, NULL, $4)); $$ = list; } | PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { - struct parse_events_state *data = _data; + struct parse_events_state *parse_state = _parse_state; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx, (void *) $2, $4, 0)); $$ = list; } | PE_PREFIX_MEM PE_VALUE sep_dc { - struct parse_events_state *data = _data; + struct parse_events_state *parse_state = _parse_state; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx, (void *) $2, NULL, 0)); $$ = list; } @@ -391,15 +391,15 @@ PE_PREFIX_MEM PE_VALUE sep_dc event_legacy_tracepoint: tracepoint_name opt_event_config { - struct parse_events_state *data = _data; - struct parse_events_error *error = data->error; + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); if (error) error->idx = @1.first_column; - if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + if (parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event, error, $2)) return -1; @@ -432,7 +432,7 @@ PE_VALUE ':' PE_VALUE opt_event_config struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(_data, list, (u32)$1, $3, $4)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4)); parse_events_terms__delete($4); $$ = list; } @@ -443,7 +443,7 @@ PE_RAW opt_event_config struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(_data, list, PERF_TYPE_RAW, $1, $2)); + ABORT_ON(parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2)); parse_events_terms__delete($2); $$ = list; } @@ -451,12 +451,12 @@ PE_RAW opt_event_config event_bpf_file: PE_BPF_OBJECT opt_event_config { - struct parse_events_state *data = _data; - struct parse_events_error *error = data->error; + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2)); + ABORT_ON(parse_events_load_bpf(parse_state, list, $1, false, $2)); parse_events_terms__delete($2); $$ = list; } @@ -466,7 +466,7 @@ PE_BPF_SOURCE opt_event_config struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(_data, list, $1, true, $2)); + ABORT_ON(parse_events_load_bpf(_parse_state, list, $1, true, $2)); parse_events_terms__delete($2); $$ = list; } @@ -488,8 +488,8 @@ opt_event_config: start_terms: event_config { - struct parse_events_state *data = _data; - data->terms = $1; + struct parse_events_state *parse_state = _parse_state; + parse_state->terms = $1; } event_config: @@ -679,9 +679,9 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(YYLTYPE *loc, void *data, +void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner __maybe_unused, char const *msg __maybe_unused) { - parse_events_evlist_error(data, loc->last_column, "parser error"); + parse_events_evlist_error(parse_state, loc->last_column, "parser error"); } From 4717e03cc7e826818c3f9bd60e9c304a3309c3ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 16:44:36 -0300 Subject: [PATCH 204/253] perf tools: Remove unused cpu_relax() macros Since 195564390210 ("perf_counter: kerneltop: simplify data_head read") we do not use it, and this was way back in 2009, remove it before some other arch maintainer adds its implementation, like so many did, needlessly :-) Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3l2su9c58eaq4twjzrf9uu08@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-sys.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index e4b717e9eb6c..2aaa7366cdc2 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -10,12 +10,10 @@ #include #if defined(__i386__) -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} #endif #if defined(__x86_64__) -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} #endif @@ -44,7 +42,6 @@ #endif #ifdef __ia64__ -#define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC {"model name"} #endif @@ -53,7 +50,6 @@ #endif #ifdef __aarch64__ -#define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ @@ -73,14 +69,9 @@ #endif #ifdef __tile__ -#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory") #define CPUINFO_PROC {"model name"} #endif -#ifndef cpu_relax -#define cpu_relax() barrier() -#endif - static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, From 9a57eaf1d2443d34cce0562f425228c37a8ec019 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2017 16:58:21 -0300 Subject: [PATCH 205/253] perf tools: Use default CPUINFO_PROC where it fits Several architectures don't need to define it since the string is the same as the default one, so nuke them. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-v1e1jr1u474w9xcelpaoxamu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-sys.h | 19 ++----------------- tools/perf/util/header.c | 3 --- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 2aaa7366cdc2..c11f0c76e90c 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -9,14 +9,6 @@ #include #include -#if defined(__i386__) -#define CPUINFO_PROC {"model name"} -#endif - -#if defined(__x86_64__) -#define CPUINFO_PROC {"model name"} -#endif - #ifdef __powerpc__ #define CPUINFO_PROC {"cpu"} #endif @@ -41,17 +33,10 @@ #define CPUINFO_PROC {"cpu model"} #endif -#ifdef __ia64__ -#define CPUINFO_PROC {"model name"} -#endif - #ifdef __arm__ #define CPUINFO_PROC {"model name", "Processor"} #endif -#ifdef __aarch64__ -#endif - #ifdef __mips__ #define CPUINFO_PROC {"cpu model"} #endif @@ -68,8 +53,8 @@ #define CPUINFO_PROC {"core ID"} #endif -#ifdef __tile__ -#define CPUINFO_PROC {"model name"} +#ifndef CPUINFO_PROC +#define CPUINFO_PROC { "model name", } #endif static inline int diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 28bf4442d577..605bbd5404fb 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -380,9 +380,6 @@ static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc) static int write_cpudesc(struct feat_fd *ff, struct perf_evlist *evlist __maybe_unused) { -#ifndef CPUINFO_PROC -#define CPUINFO_PROC {"model name", } -#endif const char *cpuinfo_procs[] = CPUINFO_PROC; unsigned int i; From 1ac39372e06f5009982aaaf890fc5bbd044bb047 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 18 Aug 2017 17:46:48 +0900 Subject: [PATCH 206/253] perf annotate stdio: Support --show-nr-samples option Add --show-nr-samples option to "perf annotate" so that it matches "perf report". Committer note: Note that it can't be used together with --show-total-period, which seems like a silly limitation, that can be lifted at some point. Made it bail out if not on --stdio. Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1503046008-5511-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 4 ++++ tools/perf/builtin-annotate.c | 16 ++++++++++++++-- tools/perf/util/annotate.c | 6 +++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index a89273d8e744..2a5975c91cbd 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -43,6 +43,10 @@ OPTIONS --quiet:: Do not show any message. (Suppress -v) +-n:: +--show-nr-samples:: + Show the number of samples for each symbol + -D:: --dump-raw-trace:: Dump raw trace in ASCII. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 658c920d74b9..89fc0389dc76 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -403,7 +403,7 @@ int cmd_annotate(int argc, const char **argv) struct perf_data_file file = { .mode = PERF_DATA_MODE_READ, }; - const struct option options[] = { + struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -445,13 +445,20 @@ int cmd_annotate(int argc, const char **argv) "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, + "Show a column with the number of samples"), OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", "'always' (default), 'never' or 'auto' only applicable to --stdio mode", stdio__config_color, "always"), OPT_END() }; - int ret = hists__init(); + int ret; + set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); + set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); + + + ret = hists__init(); if (ret < 0) return ret; @@ -467,6 +474,11 @@ int cmd_annotate(int argc, const char **argv) annotate.sym_hist_filter = argv[0]; } + if (symbol_conf.show_nr_samples && !annotate.use_stdio) { + pr_err("--show-nr-samples is only available in --stdio mode at this time\n"); + return ret; + } + if (quiet) perf_quiet_option(); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2dab0e5a7f2f..4397a8b6e6cd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1145,6 +1145,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, sample.period); + else if (symbol_conf.show_nr_samples) + color_fprintf(stdout, color, " %7" PRIu64, + sample.nr_samples); else color_fprintf(stdout, color, " %7.2f", percent); } @@ -1825,7 +1828,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, width *= evsel->nr_members; graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", - width, width, symbol_conf.show_total_period ? "Event count" : "Percent", + width, width, symbol_conf.show_total_period ? "Period" : + symbol_conf.show_nr_samples ? "Samples" : "Percent", d_filename, evsel_name, h->nr_samples); printf("%-*.*s----\n", From 01c85629f5e99958606da816f1df058c0722a570 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 18 Aug 2017 17:46:53 +0900 Subject: [PATCH 207/253] perf annotate: Document --show-total-period option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the --show-total-period option was introduced we forgot to add an entry in the man page, fix it. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Cc: Martin Liška Fixes: 0c4a5bcea460 ("perf annotate: Display total number of samples with --show-total-period") Link: http://lkml.kernel.org/r/1503046013-5555-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 2a5975c91cbd..c635eab6af54 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -92,6 +92,8 @@ OPTIONS --asm-raw:: Show raw instruction encoding of assembly instructions. +--show-total-period:: Show a column with the sum of periods. + --source:: Interleave source code with assembly code. Enabled by default, disable with --no-source. From 9cef4b0b5b7f64016f043609313aaa821d682d2e Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 18 Aug 2017 17:47:03 +0900 Subject: [PATCH 208/253] perf annotate browser: Support --show-nr-samples option Support the --show-nr-samples in the TUI browser. Committer notes: Lift the restriction about --tui but leave it for --gtk: $ export LD_LIBRARY_PATH=~/lib64 $ perf annotate --gtk --show-nr-samples --show-nr-samples is not available in --gtk mode at this time $ Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1503046023-5646-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/ui/browsers/annotate.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 89fc0389dc76..c38373195c4a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -474,8 +474,8 @@ int cmd_annotate(int argc, const char **argv) annotate.sym_hist_filter = argv[0]; } - if (symbol_conf.show_nr_samples && !annotate.use_stdio) { - pr_err("--show-nr-samples is only available in --stdio mode at this time\n"); + if (symbol_conf.show_nr_samples && annotate.use_gtk) { + pr_err("--show-nr-samples is not available in --gtk mode at this time\n"); return ret; } diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 80f38dab9c3a..faca1b94496e 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -42,6 +42,7 @@ static struct annotate_browser_opt { jump_arrows, show_linenr, show_nr_jumps, + show_nr_samples, show_total_period; } annotate_browser__opts = { .use_offset = true, @@ -155,6 +156,9 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", bdl->samples[i].he.period); + } else if (annotate_browser__opts.show_nr_samples) { + ui_browser__printf(browser, "%6" PRIu64 " ", + bdl->samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", bdl->samples[i].percent); @@ -167,7 +171,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_nstring(browser, " ", pcnt_width); else { ui_browser__printf(browser, "%*s", pcnt_width, - annotate_browser__opts.show_total_period ? "Period" : "Percent"); + annotate_browser__opts.show_total_period ? "Period" : + annotate_browser__opts.show_nr_samples ? "Samples" : "Percent"); } } if (ab->have_cycles) { @@ -931,9 +936,11 @@ static int annotate_browser__run(struct annotate_browser *browser, int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - /* Set default value for show_total_period. */ + /* Set default value for show_total_period and show_nr_samples */ annotate_browser__opts.show_total_period = - symbol_conf.show_total_period; + symbol_conf.show_total_period; + annotate_browser__opts.show_nr_samples = + symbol_conf.show_nr_samples; return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); } @@ -1184,6 +1191,7 @@ static struct annotate_config { ANNOTATE_CFG(jump_arrows), ANNOTATE_CFG(show_linenr), ANNOTATE_CFG(show_nr_jumps), + ANNOTATE_CFG(show_nr_samples), ANNOTATE_CFG(show_total_period), ANNOTATE_CFG(use_offset), }; From 3a555c7799de69d73826eccc9a21948a5775d4d3 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 18 Aug 2017 17:47:08 +0900 Subject: [PATCH 209/253] perf annotate browser: Circulate percent, total-period and nr-samples view Using the existing 't' hotkey, support the three views: percent, total period and number of samples on the annotate TUI browser, circulating them like below: Percent -> Total Period -> Nr Samples -> Percent ... Committer notes: Removed new 'e' hotkey, should be resubmitted as a separate patch, with proper justification for its inclusion. Suggested-by: Namhyung Kim Signed-off-by: Taeung Song Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Milian Wolff Link: http://lkml.kernel.org/r/1503046028-5691-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index faca1b94496e..ba0aee576a2b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -835,7 +835,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "n Search next string\n" "o Toggle disassembler output/simplified view\n" "s Toggle source code view\n" - "t Toggle total period view\n" + "t Circulate percent, total period, samples view\n" "/ Search string\n" "k Toggle line numbers\n" "r Run available scripts\n" @@ -912,8 +912,13 @@ static int annotate_browser__run(struct annotate_browser *browser, } continue; case 't': - annotate_browser__opts.show_total_period = - !annotate_browser__opts.show_total_period; + if (annotate_browser__opts.show_total_period) { + annotate_browser__opts.show_total_period = false; + annotate_browser__opts.show_nr_samples = true; + } else if (annotate_browser__opts.show_nr_samples) + annotate_browser__opts.show_nr_samples = false; + else + annotate_browser__opts.show_total_period = true; annotate_browser__update_addr_width(browser); continue; case K_LEFT: From d74be47673676eded2f0bb8274e752bed32c42d8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:16 -0700 Subject: [PATCH 210/253] perf xyarray: Save max_x, max_y Save the original array dimensions in xyarrays, so that users can retrieve them later. Add some inline functions to access these fields. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-1-andi@firstfloor.org [ As noticed by Jiri, fix up namespacing: xy__method() -> xyarray__method() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/xyarray.c | 2 ++ tools/perf/util/xyarray.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index 7251fdbabced..c8f415d9877b 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -12,6 +12,8 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) xy->entry_size = entry_size; xy->row_size = row_size; xy->entries = xlen * ylen; + xy->max_x = xlen; + xy->max_y = ylen; } return xy; diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index 7f30af371b7e..4ba726c90870 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -7,6 +7,8 @@ struct xyarray { size_t row_size; size_t entry_size; size_t entries; + size_t max_x; + size_t max_y; char contents[]; }; @@ -19,4 +21,14 @@ static inline void *xyarray__entry(struct xyarray *xy, int x, int y) return &xy->contents[x * xy->row_size + y * xy->entry_size]; } +static inline int xyarray__max_y(struct xyarray *xy) +{ + return xy->max_x; +} + +static inline int xyarray__max_x(struct xyarray *xy) +{ + return xy->max_y; +} + #endif /* _PERF_XYARRAY_H_ */ From 475fb533fb7d3dcf009a434f9b9ea238b93f4cb8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:17 -0700 Subject: [PATCH 211/253] perf evsel: Fix buffer overflow while freeing events Fix buffer overflow for: % perf stat -e msr/tsc/,cstate_core/c7-residency/ true that causes glibc free list corruption. For some reason it doesn't trigger in valgrind, but it is visible in AS: ================================================================= ==32681==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x603000003f5c at pc 0x0000005671ef bp 0x7ffdaaac9ac0 sp 0x7ffdaaac9ab0 READ of size 4 at 0x603000003f5c thread T0 #0 0x5671ee in perf_evsel__close_fd util/evsel.c:1196 #1 0x56c57a in perf_evsel__close util/evsel.c:1717 #2 0x55ed5f in perf_evlist__close util/evlist.c:1631 #3 0x4647e1 in __run_perf_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:749 #4 0x4648e3 in run_perf_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:767 #5 0x46e1bc in cmd_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:2785 #6 0x52f83d in run_builtin /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:296 #7 0x52fd49 in handle_internal_command /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:348 #8 0x5300de in run_argv /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:392 #9 0x5308f3 in main /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:530 #10 0x7f0672d13400 in __libc_start_main (/lib64/libc.so.6+0x20400) #11 0x428419 in _start (/home/ak/hle/obj-perf/perf+0x428419) 0x603000003f5c is located 0 bytes to the right of 28-byte region [0x603000003f40,0x603000003f5c) allocated by thread T0 here: #0 0x7f0675139020 in calloc (/lib64/libasan.so.3+0xc7020) #1 0x648a2d in zalloc util/util.h:23 #2 0x648a88 in xyarray__new util/xyarray.c:9 #3 0x566419 in perf_evsel__alloc_fd util/evsel.c:1039 #4 0x56b427 in perf_evsel__open util/evsel.c:1529 #5 0x56c620 in perf_evsel__open_per_thread util/evsel.c:1730 #6 0x461dea in create_perf_stat_counter /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:263 #7 0x4637d7 in __run_perf_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:600 #8 0x4648e3 in run_perf_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:767 #9 0x46e1bc in cmd_stat /home/ak/hle/linux-hle-2.6/tools/perf/builtin-stat.c:2785 #10 0x52f83d in run_builtin /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:296 #11 0x52fd49 in handle_internal_command /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:348 #12 0x5300de in run_argv /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:392 #13 0x5308f3 in main /home/ak/hle/linux-hle-2.6/tools/perf/perf.c:530 #14 0x7f0672d13400 in __libc_start_main (/lib64/libc.so.6+0x20400) The event is allocated with cpus == 1, but freed with cpus == real number When the evsel close function walks the file descriptors it exceeds the fd xyarray boundaries and reads random memory. v2: Now that xyarrays save their original dimensions we can use these to iterate the two dimensional fd arrays. Fix some users (close, ioctl) in evsel.c to use these fields directly. This allows simplifying the code and dropping quite a few function arguments. Adjust all callers by removing the unneeded arguments. The actual perf event reading still uses the original values from the evsel list. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-2-andi@firstfloor.org [ Fix up xy_max_[xy]() -> xyarray__max_[xy]() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/openat-syscall.c | 2 +- tools/perf/util/evlist.c | 12 ++----- tools/perf/util/evsel.c | 37 +++++++--------------- tools/perf/util/evsel.h | 7 ++-- 5 files changed, 20 insertions(+), 40 deletions(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 87265117fd7f..9cf1c35f2ad0 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -115,7 +115,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int perf_evsel__free_counts(evsel); out_close_fd: - perf_evsel__close_fd(evsel, 1, threads->nr); + perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 85bb6729d303..9dc5c5d37553 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -56,7 +56,7 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m err = 0; out_close_fd: - perf_evsel__close_fd(evsel, 1, threads->nr); + perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 078b58511595..6a0d7ffbeba0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1419,8 +1419,6 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e { struct perf_evsel *evsel; int err = 0; - const int ncpus = cpu_map__nr(evlist->cpus), - nthreads = thread_map__nr(evlist->threads); evlist__for_each_entry(evlist, evsel) { if (evsel->filter == NULL) @@ -1430,7 +1428,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e * filters only work for tracepoint event, which doesn't have cpu limit. * So evlist and evsel should always be same. */ - err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); + err = perf_evsel__apply_filter(evsel, evsel->filter); if (err) { *err_evsel = evsel; break; @@ -1623,13 +1621,9 @@ void perf_evlist__set_selected(struct perf_evlist *evlist, void perf_evlist__close(struct perf_evlist *evlist) { struct perf_evsel *evsel; - int ncpus = cpu_map__nr(evlist->cpus); - int nthreads = thread_map__nr(evlist->threads); - evlist__for_each_entry_reverse(evlist, evsel) { - int n = evsel->cpus ? evsel->cpus->nr : ncpus; - perf_evsel__close(evsel, n, nthreads); - } + evlist__for_each_entry_reverse(evlist, evsel) + perf_evsel__close(evsel); } static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3735c9e0080d..5dfb8bc4db89 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1051,16 +1051,13 @@ static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthread return evsel->fd != NULL ? 0 : -ENOMEM; } -static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthreads, +static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg) { int cpu, thread; - if (evsel->system_wide) - nthreads = 1; - - for (cpu = 0; cpu < ncpus; cpu++) { - for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int fd = FD(evsel, cpu, thread), err = ioctl(fd, ioc, arg); @@ -1072,10 +1069,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea return 0; } -int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter) +int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { - return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter); } @@ -1122,20 +1118,14 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) int perf_evsel__enable(struct perf_evsel *evsel) { - int nthreads = thread_map__nr(evsel->threads); - int ncpus = cpu_map__nr(evsel->cpus); - - return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0); } int perf_evsel__disable(struct perf_evsel *evsel) { - int nthreads = thread_map__nr(evsel->threads); - int ncpus = cpu_map__nr(evsel->cpus); - - return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0); } @@ -1185,15 +1175,12 @@ static void perf_evsel__free_config_terms(struct perf_evsel *evsel) } } -void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) +void perf_evsel__close_fd(struct perf_evsel *evsel) { int cpu, thread; - if (evsel->system_wide) - nthreads = 1; - - for (cpu = 0; cpu < ncpus; cpu++) - for (thread = 0; thread < nthreads; ++thread) { + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) + for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } @@ -1854,12 +1841,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, return err; } -void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) +void perf_evsel__close(struct perf_evsel *evsel) { if (evsel->fd == NULL) return; - perf_evsel__close_fd(evsel, ncpus, nthreads); + perf_evsel__close_fd(evsel); perf_evsel__free_fd(evsel); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index de03c18daaf0..351d3b2d8887 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -226,7 +226,7 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel); int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); -void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__close_fd(struct perf_evsel *evsel); void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, enum perf_event_sample_format bit); @@ -246,8 +246,7 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter); -int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, - const char *filter); +int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__enable(struct perf_evsel *evsel); int perf_evsel__disable(struct perf_evsel *evsel); @@ -257,7 +256,7 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads); -void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__close(struct perf_evsel *evsel); struct perf_sample; From 77d0871c76bad1093a3d86870fe76dd1ad0ca397 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:19 -0700 Subject: [PATCH 212/253] perf bpf: Tighten detection of BPF events perf stat -e cpu/uops_executed.core,cmask=1/ would be detected as a BPF source event because the .c matches the .c source BPF pattern. v2: Originally I tried to use lex lookahead, but it doesn't seem to work. This now extends the BPF pattern to match longer events, but then does an extra check in the C code to reject BPF matches that do not end with .c/.o/.obj This uses REJECT, which makes the flex scanner slower, but that shouldn't be a big problem for the perf events. Committer testing: # perf trace -e write -e /home/acme/bpf/tracepoint.c cat /etc/passwd > /dev/null 0.000 ( 0.006 ms): cat/18485 write(fd: 1, buf: 0x7f59eebe1000, count: 3494 ) ... 0.006 ( ): raw_syscalls:sys_enter:NR 1 (1, 7f59eebe1000, da6, 22, 7f59eebe0010, 0)) 0.008 ( ): perf_bpf_probe:_write:(ffffffff9626b2c0)) 0.000 ( 0.010 ms): cat/18485 ... [continued]: write()) = 3494 # It continues doing what was expected, i.e. identifying /home/acme/bpf/tracepoint.c as a BPF event and activates the clang machinery to build an eBPF object and then uses sys_bpf() to hook it up to the raw_syscalls:sys_enter tracepoint, etc. Andi forgot to add Wang to the CC list, fix it. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Wang Nan Link: http://lkml.kernel.org/r/20170811232634.30465-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 660fca05bc93..c42edeac451f 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -53,6 +53,21 @@ static int str(yyscan_t scanner, int token) return token; } +static bool isbpf(yyscan_t scanner) +{ + char *text = parse_events_get_text(scanner); + int len = strlen(text); + + if (len < 2) + return false; + if ((text[len - 1] == 'c' || text[len - 1] == 'o') && + text[len - 2] == '.') + return true; + if (len > 4 && !strcmp(text + len - 4, ".obj")) + return true; + return false; +} + /* * This function is called when the parser gets two kind of input: * @@ -136,8 +151,8 @@ do { \ group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* event [^,{}/]+ -bpf_object [^,{}]+\.(o|bpf) -bpf_source [^,{}]+\.c +bpf_object [^,{}]+\.(o|bpf)[a-zA-Z0-9._]* +bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -307,8 +322,8 @@ r{num_raw_hex} { return raw(yyscanner); } {num_hex} { return value(yyscanner, 16); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { return str(yyscanner, PE_BPF_SOURCE); } +{bpf_object} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_OBJECT); } +{bpf_source} { if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } "/" { BEGIN(config); return '/'; } - { return '-'; } From de5077c4e38f2a51f50d28bdd5e4a0f14b3d16ff Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:22 -0700 Subject: [PATCH 213/253] perf tools: Add utility function to detect SMT status Add an smt_on() function to return if SMT is enabled or disabled. Used in the next patch. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/smt.c | 44 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/smt.h | 6 ++++++ 3 files changed, 51 insertions(+) create mode 100644 tools/perf/util/smt.c create mode 100644 tools/perf/util/smt.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8d49a989f193..94518c1bf8b6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -22,6 +22,7 @@ libperf-y += rbtree.o libperf-y += libstring.o libperf-y += bitmap.o libperf-y += hweight.o +libperf-y += smt.o libperf-y += quote.o libperf-y += strbuf.o libperf-y += string.o diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c new file mode 100644 index 000000000000..453f6f6f29f3 --- /dev/null +++ b/tools/perf/util/smt.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include "api/fs/fs.h" +#include "smt.h" + +int smt_on(void) +{ + static bool cached; + static int cached_result; + int cpu; + int ncpu; + + if (cached) + return cached_result; + + ncpu = sysconf(_SC_NPROCESSORS_CONF); + for (cpu = 0; cpu < ncpu; cpu++) { + unsigned long long siblings; + char *str; + size_t strlen; + char fn[256]; + + snprintf(fn, sizeof fn, + "devices/system/cpu/cpu%d/topology/thread_siblings", + cpu); + if (sysfs__read_str(fn, &str, &strlen) < 0) + continue; + /* Entry is hex, but does not have 0x, so need custom parser */ + siblings = strtoull(str, NULL, 16); + free(str); + if (hweight64(siblings) > 1) { + cached_result = 1; + cached = true; + break; + } + } + if (!cached) { + cached_result = 0; + cached = true; + } + return cached_result; +} diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h new file mode 100644 index 000000000000..b8414b7bcbc8 --- /dev/null +++ b/tools/perf/util/smt.h @@ -0,0 +1,6 @@ +#ifndef SMT_H +#define SMT_H 1 + +int smt_on(void); + +#endif From d73bad06851b8d5745c11dd4ab789464f6c71b39 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:23 -0700 Subject: [PATCH 214/253] perf tools: Expression parser enhancements for metrics Enhance the expression parser for more complex metric formulas. - Support python style IF ELSE operators - Add an #SMT_On magic variable for formulas that depend on the SMT status. Example: 4 *( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles - Support MIN/MAX operations Example: min(1 , IDQ.MITE_UOPS / ( UPI * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) ) This is useful to fix up problems caused by multiplexing. - Support | & ^ operators - Minor cleanups and fixes - Support an \ escape for operators. This allows to specify event names like c2-residency - Support @ as an alternative for / to be able to specify pmus without conflicts with operators (like msr/tsc/ as msr@tsc@) Example: (cstate_core@c3\\-residency@ / msr@tsc@) * 100 Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-8-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 5 ++++ tools/perf/util/expr.y | 61 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index e93903295532..cb251bf523e7 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -31,6 +31,11 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) ret |= test(&ctx, "(BAR/2)%2", 1); ret |= test(&ctx, "1 - -4", 5); ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + ret |= test(&ctx, "1-1 | 1", 1); + ret |= test(&ctx, "1-1 & 1", 0); + ret |= test(&ctx, "min(1,2) + 1", 2); + ret |= test(&ctx, "max(1,2) + 1", 3); + ret |= test(&ctx, "1+1 if 3*4 else 0", 2); if (ret) return ret; diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 953e65ba2cc7..5753c4f21534 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -4,6 +4,7 @@ #include "util/debug.h" #define IN_EXPR_Y 1 #include "expr.h" +#include "smt.h" #include #define MAXIDLEN 256 @@ -22,13 +23,15 @@ %token NUMBER %token ID +%token MIN MAX IF ELSE SMT_ON +%left MIN MAX IF %left '|' %left '^' %left '&' %left '-' '+' %left '*' '/' '%' %left NEG NOT -%type expr +%type expr if_expr %{ static int expr__lex(YYSTYPE *res, const char **pp); @@ -57,7 +60,12 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val) %} %% -all_expr: expr { *final_val = $1; } +all_expr: if_expr { *final_val = $1; } + ; + +if_expr: + expr IF expr ELSE expr { $$ = $3 ? $1 : $5; } + | expr ; expr: NUMBER @@ -66,13 +74,19 @@ expr: NUMBER YYABORT; } } + | expr '|' expr { $$ = (long)$1 | (long)$3; } + | expr '&' expr { $$ = (long)$1 & (long)$3; } + | expr '^' expr { $$ = (long)$1 ^ (long)$3; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; } | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; } | '-' expr %prec NEG { $$ = -$2; } - | '(' expr ')' { $$ = $2; } + | '(' if_expr ')' { $$ = $2; } + | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } + | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } + | SMT_ON { $$ = smt_on() > 0; } ; %% @@ -82,13 +96,47 @@ static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) char *dst = res->id; const char *s = p; - while (isalnum(*p) || *p == '_' || *p == '.') { + if (*p == '#') + *dst++ = *p++; + + while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { if (p - s >= MAXIDLEN) return -1; - *dst++ = *p++; + /* + * Allow @ instead of / to be able to specify pmu/event/ without + * conflicts with normal division. + */ + if (*p == '@') + *dst++ = '/'; + else if (*p == '\\') + *dst++ = *++p; + else + *dst++ = *p; + p++; } *dst = 0; *pp = p; + dst = res->id; + switch (dst[0]) { + case 'm': + if (!strcmp(dst, "min")) + return MIN; + if (!strcmp(dst, "max")) + return MAX; + break; + case 'i': + if (!strcmp(dst, "if")) + return IF; + break; + case 'e': + if (!strcmp(dst, "else")) + return ELSE; + break; + case '#': + if (!strcasecmp(dst, "#smt_on")) + return SMT_ON; + break; + } return ID; } @@ -102,6 +150,7 @@ static int expr__lex(YYSTYPE *res, const char **pp) p++; s = p; switch (*p++) { + case '#': case 'a' ... 'z': case 'A' ... 'Z': return expr__symbol(res, p - 1, pp); @@ -151,7 +200,7 @@ int expr__find_other(const char *p, const char *one, const char ***other, err = 0; break; } - if (tok == ID && strcasecmp(one, val.id)) { + if (tok == ID && (!one || strcasecmp(one, val.id))) { if (num_other >= EXPR_MAX_OTHER - 1) { pr_debug("Too many extra events in %s\n", orig); break; From 8d3db2b97f5cb22cc589ba1e8e5232a26bfeb5d6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:24 -0700 Subject: [PATCH 215/253] perf tools: Increase maximum number of events in expressions Some of the upcoming metrics need more than 8 events. Increase the maximum number the parser supports. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9c2760a1a96e..400ef9eab00a 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -1,7 +1,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#define EXPR_MAX_OTHER 8 +#define EXPR_MAX_OTHER 15 #define MAX_PARSE_ID EXPR_MAX_OTHER struct parse_id { From d66dccdb13c9748f27d8401b45c027953fc833e8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 11 Aug 2017 16:26:25 -0700 Subject: [PATCH 216/253] perf tools: Dedup events in expression parsing Avoid adding redundant events while parsing an expression. When we add an "other" event check first if it already exists. v2: Fix perf test failure. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170811232634.30465-10-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 5753c4f21534..432b8560cf51 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -181,6 +181,19 @@ void expr__ctx_init(struct parse_ctx *ctx) ctx->num_ids = 0; } +static bool already_seen(const char *val, const char *one, const char **other, + int num_other) +{ + int i; + + if (one && !strcasecmp(one, val)) + return true; + for (i = 0; i < num_other; i++) + if (!strcasecmp(other[i], val)) + return true; + return false; +} + int expr__find_other(const char *p, const char *one, const char ***other, int *num_otherp) { @@ -200,7 +213,7 @@ int expr__find_other(const char *p, const char *one, const char ***other, err = 0; break; } - if (tok == ID && (!one || strcasecmp(one, val.id))) { + if (tok == ID && !already_seen(val.id, one, *other, num_other)) { if (num_other >= EXPR_MAX_OTHER - 1) { pr_debug("Too many extra events in %s\n", orig); break; From 630171d4156a257869b3cca5b2e63aacf7bc7948 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Mar 2017 15:23:13 -0800 Subject: [PATCH 217/253] perf vendor events: Add core event list for Skylake Server Based on JSON list version v1.01 Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/3269ae458a883139110ec82bc895423bd8843d65 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + .../pmu-events/arch/x86/skylakex/cache.json | 1672 +++++++++++++++++ .../arch/x86/skylakex/floating-point.json | 88 + .../arch/x86/skylakex/frontend.json | 482 +++++ .../pmu-events/arch/x86/skylakex/memory.json | 1396 ++++++++++++++ .../pmu-events/arch/x86/skylakex/other.json | 72 + .../arch/x86/skylakex/pipeline.json | 950 ++++++++++ .../arch/x86/skylakex/virtual-memory.json | 284 +++ 8 files changed, 4945 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/other.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index d1a12e584c1b..4ea068366c3e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -34,3 +34,4 @@ GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core +GenuineIntel-6-55,v1,skylakex,core diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json new file mode 100644 index 000000000000..b5bc742b6fbc --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -0,0 +1,1672 @@ +[ + { + "EventCode": "0x24", + "UMask": "0x21", + "BriefDescription": "Demand Data Read miss L2, no rejects", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Demand requests that miss L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x38", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.PF_MISS", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "All requests that miss L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xd8", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.PF_HIT", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xe1", + "BriefDescription": "Demand Data Read requests", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xe2", + "BriefDescription": "RFO requests to L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xe4", + "BriefDescription": "L2 code requests", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xf8", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_PF", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "All L2 requests.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x2E", + "UMask": "0x41", + "BriefDescription": "Core-originated cacheable demand requests missed L3", + "Counter": "0,1,2,3", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x2E", + "UMask": "0x4f", + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", + "Counter": "0,1,2,3", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "L1D miss outstandings duration in cycles", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x48", + "UMask": "0x2", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x51", + "UMask": "0x1", + "BriefDescription": "L1D data line replacements", + "Counter": "0,1,2,3", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x2", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle. ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x2", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "CounterMask": "1", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x4", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x8", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "CounterMask": "1", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x1", + "BriefDescription": "Demand Data Read requests sent to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x2", + "BriefDescription": "Cacheable and noncachaeble code read requests", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x4", + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x8", + "BriefDescription": "Demand and prefetch data reads", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x80", + "BriefDescription": "Any memory transaction that reached the SQ.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB2", + "UMask": "0x1", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x11", + "BriefDescription": "Retired load instructions that miss the STLB.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x12", + "BriefDescription": "Retired store instructions that miss the STLB.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "SampleAfterValue": "100003", + "L1_Hit_Indication": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x21", + "BriefDescription": "Retired load instructions with locked access.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x41", + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x42", + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "L1_Hit_Indication": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x81", + "BriefDescription": "All retired load instructions.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD0", + "UMask": "0x82", + "BriefDescription": "All retired store instructions.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "SampleAfterValue": "2000003", + "L1_Hit_Indication": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x1", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x2", + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x4", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. ", + "SampleAfterValue": "50021", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x8", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x10", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "50021", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x20", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. ", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD1", + "UMask": "0x40", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. ", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD2", + "UMask": "0x1", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "SampleAfterValue": "20011", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD2", + "UMask": "0x2", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD2", + "UMask": "0x4", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD2", + "UMask": "0x8", + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x1", + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x2", + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x4", + "BriefDescription": "Retired load instructions whose data sources was remote HITM", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", + "PublicDescription": "Retired load instructions whose data sources was remote HITM.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x8", + "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", + "Data_LA": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", + "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD4", + "UMask": "0x4", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xF0", + "UMask": "0x40", + "BriefDescription": "L2 writebacks that access L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_TRANS.L2_WB", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF1", + "UMask": "0x1f", + "BriefDescription": "L2 cache lines filling L2", + "Counter": "0,1,2,3", + "EventName": "L2_LINES_IN.ALL", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF2", + "UMask": "0x1", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.", + "Counter": "0,1,2,3", + "EventName": "L2_LINES_OUT.SILENT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF2", + "UMask": "0x2", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped ", + "Counter": "0,1,2,3", + "EventName": "L2_LINES_OUT.NON_SILENT", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF2", + "UMask": "0x4", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF2", + "UMask": "0x4", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xF4", + "UMask": "0x10", + "BriefDescription": "Number of cache line split locks sent to uncore.", + "Counter": "0,1,2,3", + "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that have any response type.", + "MSRValue": "0x0000010001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that hit in the L3.", + "MSRValue": "0x3f803c0001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", + "MSRValue": "0x0000010002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.", + "MSRValue": "0x3f803c0002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that have any response type.", + "MSRValue": "0x0000010004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that hit in the L3.", + "MSRValue": "0x3f803c0004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.", + "MSRValue": "0x0000010010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.", + "MSRValue": "0x3f803c0010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.", + "MSRValue": "0x0000010020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.", + "MSRValue": "0x3f803c0020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", + "MSRValue": "0x0000010080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.", + "MSRValue": "0x3f803c0080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", + "MSRValue": "0x0000010100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.", + "MSRValue": "0x3f803c0100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.", + "MSRValue": "0x0000010400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "PF_L1D_AND_SW & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.", + "MSRValue": "0x3f803c0400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that have any response type.", + "MSRValue": "0x0000010490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that hit in the L3.", + "MSRValue": "0x3f803c0490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that have any response type.", + "MSRValue": "0x0000010120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that hit in the L3.", + "MSRValue": "0x3f803c0120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that have any response type.", + "MSRValue": "0x0000010491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.", + "MSRValue": "0x3f803c0491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.", + "MSRValue": "0x0000010122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c0122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c0122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c0122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c0122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.", + "MSRValue": "0x3f803c0122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json new file mode 100644 index 000000000000..1c09a328df36 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -0,0 +1,88 @@ +[ + { + "EventCode": "0xC7", + "UMask": "0x1", + "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x2", + "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x4", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x8", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x10", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x20", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x40", + "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", + "PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC7", + "UMask": "0x80", + "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", + "PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCA", + "UMask": "0x1e", + "BriefDescription": "Cycles with any input/output SSE or FP assist", + "Counter": "0,1,2,3", + "EventName": "FP_ASSIST.ANY", + "CounterMask": "1", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json new file mode 100644 index 000000000000..40abc0852cd6 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -0,0 +1,482 @@ +[ + { + "EventCode": "0x79", + "UMask": "0x4", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "Counter": "0,1,2,3", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "Counter": "0,1,2,3", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "Counter": "0,1,2,3", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "Counter": "0,1,2,3", + "EventName": "IDQ.DSB_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x10", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_DSB_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x18", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "Counter": "0,1,2,3", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x18", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "Counter": "0,1,2,3", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "CounterMask": "1", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x24", + "BriefDescription": "Cycles MITE is delivering 4 Uops", + "Counter": "0,1,2,3", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x24", + "BriefDescription": "Cycles MITE is delivering any Uop", + "Counter": "0,1,2,3", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "CounterMask": "1", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x80", + "UMask": "0x4", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "Counter": "0,1,2,3", + "EventName": "ICACHE_16B.IFDATA_STALL", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x83", + "UMask": "0x1", + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "EventName": "ICACHE_64B.IFTAG_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x83", + "UMask": "0x2", + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x83", + "UMask": "0x4", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "Counter": "0,1,2,3", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "CounterMask": "4", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "CounterMask": "3", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "CounterMask": "2", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "CounterMask": "1", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0x9C", + "UMask": "0x1", + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", + "Counter": "0,1,2,3", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xAB", + "UMask": "0x2", + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", + "Counter": "0,1,2,3", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "PEBS": "1", + "MSRValue": "0x11", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.DSB_MISS", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. ", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "PEBS": "1", + "MSRValue": "0x12", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.L1I_MISS", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "PEBS": "1", + "MSRValue": "0x13", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "PEBS": "1", + "MSRValue": "0x14", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.ITLB_MISS", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "PEBS": "1", + "MSRValue": "0x15", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.STLB_MISS", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. ", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x400206", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x200206", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x400406", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x400806", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x401006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x402006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x404006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x408006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x410006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x420006", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x100206", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "MSRIndex": "0x3F7", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC6", + "UMask": "0x1", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "PEBS": "1", + "MSRValue": "0x300206", + "Counter": "0,1,2,3", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", + "MSRIndex": "0x3F7", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json new file mode 100644 index 000000000000..ca22a22c1abd --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -0,0 +1,1396 @@ +[ + { + "EventCode": "0x54", + "UMask": "0x1", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Number of times a TSX line had a cache conflict.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x2", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x4", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x8", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x10", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x20", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "UMask": "0x40", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "Counter": "0,1,2,3", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5d", + "UMask": "0x1", + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", + "Counter": "0,1,2,3", + "EventName": "TX_EXEC.MISC1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5d", + "UMask": "0x2", + "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", + "Counter": "0,1,2,3", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5d", + "UMask": "0x4", + "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", + "Counter": "0,1,2,3", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5d", + "UMask": "0x8", + "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", + "Counter": "0,1,2,3", + "EventName": "TX_EXEC.MISC4", + "PublicDescription": "RTM region detected inside HLE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5d", + "UMask": "0x10", + "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", + "Counter": "0,1,2,3", + "EventName": "TX_EXEC.MISC5", + "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x10", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x10", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x10", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "CounterMask": "6", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB0", + "UMask": "0x10", + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "UMask": "0x2", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "Counter": "0,1,2,3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "Errata": "SKL089", + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x1", + "BriefDescription": "Number of times an HLE execution started.", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x2", + "BriefDescription": "Number of times an HLE execution successfully committed", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Number of times HLE commit succeeded.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x4", + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED", + "PublicDescription": "Number of times HLE abort was triggered.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x8", + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED_MEM", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x10", + "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x20", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.). ", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x40", + "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC8", + "UMask": "0x80", + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", + "Counter": "0,1,2,3", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x1", + "BriefDescription": "Number of times an RTM execution started.", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x2", + "BriefDescription": "Number of times an RTM execution successfully committed", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Number of times RTM commit succeeded.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x4", + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED", + "PublicDescription": "Number of times RTM abort was triggered.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x8", + "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED_MEM", + "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x10", + "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x20", + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x40", + "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC9", + "UMask": "0x80", + "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", + "Counter": "0,1,2,3", + "EventName": "RTM_RETIRED.ABORTED_EVENTS", + "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.", + "PEBS": "2", + "MSRValue": "0x4", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.", + "PEBS": "2", + "MSRValue": "0x8", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "50021", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.", + "PEBS": "2", + "MSRValue": "0x10", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "20011", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.", + "PEBS": "2", + "MSRValue": "0x20", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.", + "PEBS": "2", + "MSRValue": "0x40", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "2003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.", + "PEBS": "2", + "MSRValue": "0x80", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "1009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.", + "PEBS": "2", + "MSRValue": "0x100", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "503", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xCD", + "UMask": "0x1", + "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.", + "PEBS": "2", + "MSRValue": "0x200", + "Counter": "0,1,2,3", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "TakenAlone": "1", + "SampleAfterValue": "101", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss in the L3.", + "MSRValue": "0x3fbc000001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000001 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.", + "MSRValue": "0x3fbc000002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000002 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss in the L3.", + "MSRValue": "0x3fbc000004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000004 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.", + "MSRValue": "0x3fbc000010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000010 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.", + "MSRValue": "0x3fbc000020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000020 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.", + "MSRValue": "0x3fbc000080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000080 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.", + "MSRValue": "0x3fbc000100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000100 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.", + "MSRValue": "0x3fbc000400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000400 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss in the L3.", + "MSRValue": "0x3fbc000490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000490 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss in the L3.", + "MSRValue": "0x3fbc000120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000120 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.", + "MSRValue": "0x3fbc000491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000491 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.", + "MSRValue": "0x3fbc000122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc00122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc00122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc00122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b800122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604000122 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + } +] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json new file mode 100644 index 000000000000..70243b0b0586 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json @@ -0,0 +1,72 @@ +[ + { + "EventCode": "0x28", + "UMask": "0x7", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", + "Counter": "0,1,2,3", + "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x28", + "UMask": "0x18", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", + "Counter": "0,1,2,3", + "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x28", + "UMask": "0x20", + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", + "Counter": "0,1,2,3", + "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x28", + "UMask": "0x40", + "BriefDescription": "Core cycles the core was throttled due to a pending power level request.", + "Counter": "0,1,2,3", + "EventName": "CORE_POWER.THROTTLE", + "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCB", + "UMask": "0x1", + "BriefDescription": "Number of hardware interrupts received by the processor.", + "Counter": "0,1,2,3", + "EventName": "HW_INTERRUPTS.RECEIVED", + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", + "SampleAfterValue": "203", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xFE", + "UMask": "0x2", + "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly", + "Counter": "0,1,2,3", + "EventName": "IDI_MISC.WB_UPGRADE", + "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xFE", + "UMask": "0x4", + "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly", + "Counter": "0,1,2,3", + "EventName": "IDI_MISC.WB_DOWNGRADE", + "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + } +] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json new file mode 100644 index 000000000000..0895d1e52a4a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -0,0 +1,950 @@ +[ + { + "EventCode": "0x00", + "UMask": "0x1", + "BriefDescription": "Instructions retired from execution.", + "Counter": "Fixed counter 1", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when the thread is not in halt state", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 2" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 2" + }, + { + "EventCode": "0x00", + "UMask": "0x3", + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 3", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 3" + }, + { + "EventCode": "0x03", + "UMask": "0x2", + "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", + "Counter": "0,1,2,3", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x03", + "UMask": "0x8", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "Counter": "0,1,2,3", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x07", + "UMask": "0x1", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", + "Counter": "0,1,2,3", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x80", + "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0E", + "UMask": "0x2", + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0E", + "UMask": "0x20", + "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.SLOW_LEA", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x14", + "UMask": "0x1", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", + "Counter": "0,1,2,3", + "EventName": "ARITH.DIVIDER_ACTIVE", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "CounterMask": "1", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2503", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2503", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2503", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x4C", + "UMask": "0x1", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "Counter": "0,1,2,3", + "EventName": "LOAD_HIT_PRE.SW_PF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x87", + "UMask": "0x1", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "Counter": "0,1,2,3", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x1", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x8", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x1", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x4", + "BriefDescription": "Total execution stalls.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterMask": "4", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "CounterMask": "8", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterMask": "12", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x10", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterMask": "16", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x14", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "CounterMask": "20", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA6", + "UMask": "0x1", + "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA6", + "UMask": "0x2", + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA6", + "UMask": "0x4", + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA6", + "UMask": "0x8", + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA6", + "UMask": "0x10", + "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA6", + "UMask": "0x40", + "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", + "Counter": "0,1,2,3", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Number of Uops delivered by the LSD.", + "Counter": "0,1,2,3", + "EventName": "LSD.UOPS", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "CounterMask": "1", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "CounterMask": "2", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "CounterMask": "3", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "CounterMask": "4", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Number of uops executed on the core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB1", + "UMask": "0x10", + "BriefDescription": "Counts the number of x87 uops dispatched.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.X87", + "PublicDescription": "Counts the number of x87 uops executed.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "SKL091, SKL044", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x1", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "SKL091, SKL044", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "Invert": "1", + "EventCode": "0xC0", + "UMask": "0x1", + "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", + "PEBS": "2", + "Counter": "0,2,3", + "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "CounterMask": "10", + "Errata": "SKL091, SKL044", + "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,2,3" + }, + { + "EventCode": "0xC1", + "UMask": "0x3f", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "Counter": "0,1,2,3", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC2", + "UMask": "0x2", + "BriefDescription": "Retirement slots used.", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xC2", + "UMask": "0x2", + "BriefDescription": "Cycles without actually retired uops.", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xC2", + "UMask": "0x2", + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type. ", + "Counter": "0,1,2,3", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "PublicDescription": "Number of machine clears (nukes) of any type.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "UMask": "0x4", + "BriefDescription": "Self-modifying code (SMC) detected.", + "Counter": "0,1,2,3", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "Errata": "SKL091", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x1", + "BriefDescription": "Conditional branch instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x2", + "BriefDescription": "Direct and indirect near call instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x4", + "BriefDescription": "All (macro) branch instructions retired. ", + "PEBS": "2", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "Errata": "SKL091", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC4", + "UMask": "0x8", + "BriefDescription": "Return instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x10", + "BriefDescription": "Not taken branch instructions retired.", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x20", + "BriefDescription": "Taken branch instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x40", + "BriefDescription": "Far branch instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "Errata": "SKL091", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC5", + "UMask": "0x1", + "BriefDescription": "Mispredicted conditional branch instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC5", + "UMask": "0x2", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC5", + "UMask": "0x4", + "BriefDescription": "Mispredicted macro branch instructions retired. ", + "PEBS": "2", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC5", + "UMask": "0x20", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Increments whenever there is an update to the LBR array.", + "Counter": "0,1,2,3", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xE6", + "UMask": "0x1", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "Counter": "0,1,2,3", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json new file mode 100644 index 000000000000..70750dab7ead --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json @@ -0,0 +1,284 @@ +[ + { + "EventCode": "0x08", + "UMask": "0x1", + "BriefDescription": "Load misses in all DTLB levels that cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x2", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts demand data loads that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x4", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts demand data loads that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x8", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x10", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture. ", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x10", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "CounterMask": "1", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x08", + "UMask": "0x20", + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x1", + "BriefDescription": "Store misses in all DTLB levels that cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x2", + "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts demand data stores that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x4", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts demand data stores that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x8", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x10", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture. ", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x10", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "CounterMask": "1", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x49", + "UMask": "0x20", + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x4F", + "UMask": "0x10", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", + "Counter": "0,1,2,3", + "EventName": "EPT.WALK_PENDING", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x1", + "BriefDescription": "Misses at all ITLB levels that cause page walks", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x2", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x4", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts completed page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x8", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x10", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake. ", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture. ", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x10", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "CounterMask": "1", + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "UMask": "0x20", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xAE", + "UMask": "0x1", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", + "Counter": "0,1,2,3", + "EventName": "ITLB.ITLB_FLUSH", + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xBD", + "UMask": "0x1", + "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "Counter": "0,1,2,3", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xBD", + "UMask": "0x20", + "BriefDescription": "STLB flush attempts", + "Counter": "0,1,2,3", + "EventName": "TLB_FLUSH.STLB_ANY", + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" + } +] \ No newline at end of file From 41d3d6db1767326dd7daf7c6df48e42020647c15 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Mar 2017 12:51:38 -0800 Subject: [PATCH 218/253] perf vendor events: Add Skylake server uncore event list Add JSON uncore events for Skylake Server to perf. Based on JSON list V1.01 This is a much fuller list than with earlier uncores, including more low level (but also harder to understand) events. It does not include the "experimential" events. The previous high level metric (LLC_* etc.) are still available when applicable. C state power events are not included at this point. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170816220553.GA19463@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/skylakex/uncore-memory.json | 172 +++ .../arch/x86/skylakex/uncore-other.json | 1156 +++++++++++++++++ 2 files changed, 1328 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json new file mode 100644 index 000000000000..9c7e5f8beee2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json @@ -0,0 +1,172 @@ +[ + { + "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "LLC_MISSES.MEM_READ", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0x3", + "Unit": "iMC" + }, + { + "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "LLC_MISSES.MEM_WRITE", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0xC", + "Unit": "iMC" + }, + { + "BriefDescription": "Memory controller clock ticks", + "Counter": "0,1,2,3", + "EventName": "UNC_M_CLOCKTICKS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode", + "Counter": "0,1,2,3", + "EventCode": "0x85", + "EventName": "UNC_M_POWER_CHANNEL_PPD", + "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_channel_ppd %", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Cycles Memory is in self refresh power mode", + "Counter": "0,1,2,3", + "EventCode": "0x43", + "EventName": "UNC_M_POWER_SELF_REFRESH", + "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.", + "MetricName": "power_self_refresh %", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Pre-charges due to page misses", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_M_PRE_COUNT.PAGE_MISS", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "iMC" + }, + { + "BriefDescription": "Pre-charge for reads", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_M_PRE_COUNT.RD", + "PerPkg": "1", + "UMask": "0x4", + "Unit": "iMC" + }, + { + "BriefDescription": "Pre-charge for writes", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_M_PRE_COUNT.WR", + "PerPkg": "1", + "UMask": "0x8", + "Unit": "iMC" + }, + { + "BriefDescription": "DRAM Page Activate commands sent due to a write request", + "Counter": "0,1,2,3", + "EventCode": "0x1", + "EventName": "UNC_M_ACT_COUNT.WR", + "PerPkg": "1", + "PublicDescription": "Counts DRAM Page Activate commands sent on this channel due to a write request to the iMC (Memory Controller). Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS (Column Access Select) command.", + "UMask": "0x2", + "Unit": "iMC" + }, + { + "BriefDescription": "All DRAM CAS Commands issued", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "UNC_M_CAS_COUNT.ALL", + "PerPkg": "1", + "PublicDescription": "Counts all CAS (Column Address Select) commands issued to DRAM per memory channel. CAS commands are issued to specify the address to read or write on DRAM, so this event increments for every read and write. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.", + "UMask": "0xF", + "Unit": "iMC" + }, + { + "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "LLC_MISSES.MEM_READ", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0x3", + "Unit": "iMC" + }, + { + "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "UNC_M_CAS_COUNT.RD_REG", + "PerPkg": "1", + "PublicDescription": "Counts CAS (Column Access Select) regular read commands issued to DRAM on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this event increments for every regular read. This event only counts regular reads and does not includes underfill reads due to partial write requests. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.", + "UMask": "0x1", + "Unit": "iMC" + }, + { + "BriefDescription": "DRAM Underfill Read CAS Commands issued", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL", + "PerPkg": "1", + "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ", + "UMask": "0x2", + "Unit": "iMC" + }, + { + "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", + "Counter": "0,1,2,3", + "EventCode": "0x4", + "EventName": "LLC_MISSES.MEM_WRITE", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0xC", + "Unit": "iMC" + }, + { + "BriefDescription": "Read Pending Queue Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x10", + "EventName": "UNC_M_RPQ_INSERTS", + "PerPkg": "1", + "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ", + "Unit": "iMC" + }, + { + "BriefDescription": "Read Pending Queue Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x80", + "EventName": "UNC_M_RPQ_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "Counts the number of entries in the Read Pending Queue (RPQ) at each cycle. This can then be used to calculate both the average occupancy of the queue (in conjunction with the number of cycles not empty) and the average latency in the queue (in conjunction with the number of allocations). The RPQ is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. They deallocate from the RPQ after the CAS command has been issued to memory.", + "Unit": "iMC" + }, + { + "BriefDescription": "Write Pending Queue Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x20", + "EventName": "UNC_M_WPQ_INSERTS", + "PerPkg": "1", + "PublicDescription": "Counts the number of writes requests allocated into the Write Pending Queue (WPQ). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (Memory Controller). The write requests deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.", + "Unit": "iMC" + }, + { + "BriefDescription": "Write Pending Queue Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x81", + "EventName": "UNC_M_WPQ_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.", + "Unit": "iMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json new file mode 100644 index 000000000000..de6e70e552e2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json @@ -0,0 +1,1156 @@ +[ + { + "BriefDescription": "Uncore cache clock ticks", + "Counter": "0,1,2,3", + "EventName": "UNC_CHA_CLOCKTICKS", + "PerPkg": "1", + "Unit": "CHA" + }, + { + "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "LLC_MISSES.UNCACHEABLE", + "Filter": "config1=0x40e33", + "PerPkg": "1", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "MMIO reads. Derived from unc_cha_tor_inserts.ia_miss", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "LLC_MISSES.MMIO_READ", + "Filter": "config1=0x40040e33", + "PerPkg": "1", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "LLC_MISSES.MMIO_WRITE", + "Filter": "config1=0x40041e33", + "PerPkg": "1", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "LLC_REFERENCES.STREAMING_FULL", + "Filter": "config1=0x41833", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "LLC_REFERENCES.STREAMING_PARTIAL", + "Filter": "config1=0x41a33", + "PerPkg": "1", + "ScaleUnit": "64Bytes", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "read requests from home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.READS", + "PerPkg": "1", + "UMask": "0x03", + "Unit": "CHA" + }, + { + "BriefDescription": "read requests from local home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.READS_LOCAL", + "PerPkg": "1", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "read requests from remote home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.READS_REMOTE", + "PerPkg": "1", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "write requests from home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.WRITES", + "PerPkg": "1", + "UMask": "0x0C", + "Unit": "CHA" + }, + { + "BriefDescription": "write requests from local home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL", + "PerPkg": "1", + "UMask": "0x04", + "Unit": "CHA" + }, + { + "BriefDescription": "write requests from remote home agent", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE", + "PerPkg": "1", + "UMask": "0x08", + "Unit": "CHA" + }, + { + "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UPI_DATA_BANDWIDTH_TX", + "PerPkg": "1", + "ScaleUnit": "7.11E-06Bytes", + "UMask": "0x0F", + "Unit": "UPI LL" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "LLC_MISSES.PCIE_READ", + "FCMask": "0x07", + "Filter": "ch_mask=0x1f", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "MetricName": "LLC_MISSES.PCIE_READ", + "PerPkg": "1", + "PortMask": "0x01", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "LLC_MISSES.PCIE_WRITE", + "FCMask": "0x07", + "Filter": "ch_mask=0x1f", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "MetricName": "LLC_MISSES.PCIE_WRITE", + "PerPkg": "1", + "PortMask": "0x01", + "ScaleUnit": "4Bytes", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth writing at IIO, part 0", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0", + "FCMask": "0x07", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "MetricName": "LLC_MISSES.PCIE_WRITE", + "PerPkg": "1", + "PortMask": "0x01", + "ScaleUnit": "4Bytes", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth writing at IIO, part 1", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "ScaleUnit": "4Bytes", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth writing at IIO, part 2", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "ScaleUnit": "4Bytes", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth writing at IIO, part 3", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "ScaleUnit": "4Bytes", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 0", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0", + "FCMask": "0x07", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "MetricName": "LLC_MISSES.PCIE_READ", + "PerPkg": "1", + "PortMask": "0x01", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 1", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 2", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 3", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Core Cross Snoops Issued; Multiple Core Requests", + "Counter": "0,1,2,3", + "EventCode": "0x33", + "EventName": "UNC_CHA_CORE_SNP.CORE_GTONE", + "PerPkg": "1", + "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops. Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set. For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them. However, if only 1 CV bit is set the core my have modified the data. If the transaction was an RFO, it would need to invalidate the lines. This event can be filtered based on who triggered the initial snoop(s).", + "UMask": "0x42", + "Unit": "CHA" + }, + { + "BriefDescription": "Core Cross Snoops Issued; Multiple Eviction", + "Counter": "0,1,2,3", + "EventCode": "0x33", + "EventName": "UNC_CHA_CORE_SNP.EVICT_GTONE", + "PerPkg": "1", + "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops. Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set. For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them. However, if only 1 CV bit is set the core my have modified the data. If the transaction was an RFO, it would need to invalidate the lines. This event can be filtered based on who triggered the initial snoop(s).", + "UMask": "0x82", + "Unit": "CHA" + }, + { + "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Not Needed", + "Counter": "0,1,2,3", + "EventCode": "0x53", + "EventName": "UNC_CHA_DIR_LOOKUP.NO_SNP", + "PerPkg": "1", + "PublicDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and therefore did not send a snoop because the Directory indicated it was not needed", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Needed", + "Counter": "0,1,2,3", + "EventCode": "0x53", + "EventName": "UNC_CHA_DIR_LOOKUP.SNP", + "PerPkg": "1", + "PublicDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and sent one or more snoops, because the Directory indicated it was needed", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from the HA pipe", + "Counter": "0,1,2,3", + "EventCode": "0x54", + "EventName": "UNC_CHA_DIR_UPDATE.HA", + "PerPkg": "1", + "PublicDescription": "Counts only multi-socket cacheline Directory state updates memory writes issued from the HA pipe. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from TOR pipe", + "Counter": "0,1,2,3", + "EventCode": "0x54", + "EventName": "UNC_CHA_DIR_UPDATE.TOR", + "PerPkg": "1", + "PublicDescription": "Counts only multi-socket cacheline Directory state updates due to memory writes issued from the TOR pipe which are the result of remote transaction hitting the SF/LLC and returning data Core2Core. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state", + "Counter": "0,1,2,3", + "EventCode": "0x5F", + "EventName": "UNC_CHA_HITME_HIT.EX_RDS", + "PerPkg": "1", + "PublicDescription": "Counts read requests from a remote socket which hit in the HitME cache (used to cache the multi-socket Directory state) to a line in the E(Exclusive) state. This includes the following read opcodes (RdCode, RdData, RdDataMigratory, RdCur, RdInv*, Inv*)", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Normal priority reads issued to the memory controller from the CHA", + "Counter": "0,1,2,3", + "EventCode": "0x59", + "EventName": "UNC_CHA_IMC_READS_COUNT.NORMAL", + "PerPkg": "1", + "PublicDescription": "Counts when a normal (Non-Isochronous) read is issued to any of the memory controller channels from the CHA.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "CHA to iMC Full Line Writes Issued; Full Line Non-ISOCH", + "Counter": "0,1,2,3", + "EventCode": "0x5B", + "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL", + "PerPkg": "1", + "PublicDescription": "Counts when a normal (Non-Isochronous) full line write is issued from the CHA to the any of the memory controller channels.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Number of times that an RFO hit in S state.", + "Counter": "0,1,2,3", + "EventCode": "0x39", + "EventName": "UNC_CHA_MISC.RFO_HIT_S", + "PerPkg": "1", + "PublicDescription": "Counts when a RFO (the Read for Ownership issued before a write) request hit a cacheline in the S (Shared) state.", + "UMask": "0x08", + "Unit": "CHA" + }, + { + "BriefDescription": "Local requests for exclusive ownership of a cache line without receiving data", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL", + "PerPkg": "1", + "PublicDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.", + "UMask": "0x10", + "Unit": "CHA" + }, + { + "BriefDescription": "Local requests for exclusive ownership of a cache line without receiving data", + "Counter": "0,1,2,3", + "EventCode": "0x50", + "EventName": "UNC_CHA_REQUESTS.INVITOE_REMOTE", + "PerPkg": "1", + "PublicDescription": "Counts the total number of requests coming from a remote socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.", + "UMask": "0x20", + "Unit": "CHA" + }, + { + "BriefDescription": "RspCnflct* Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSPCNFLCTS", + "PerPkg": "1", + "PublicDescription": "Counts when a a transaction with the opcode type RspCnflct* Snoop Response was received. This is returned when a snoop finds an existing outstanding transaction in a remote caching agent. This triggers conflict resolution hardware. This covers both the opcode RspCnflct and RspCnflctWbI.", + "UMask": "0x40", + "Unit": "CHA" + }, + { + "BriefDescription": "RspI Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSPI", + "PerPkg": "1", + "PublicDescription": "Counts when a transaction with the opcode type RspI Snoop Response was received which indicates the remote cache does not have the data, or when the remote cache silently evicts data (such as when an RFO: the Read for Ownership issued before a write hits non-modified data).", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "RspIFwd Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSPIFWD", + "PerPkg": "1", + "PublicDescription": "Counts when a a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.", + "UMask": "0x04", + "Unit": "CHA" + }, + { + "BriefDescription": "RspSFwd Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSPSFWD", + "PerPkg": "1", + "PublicDescription": "Counts when a a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.", + "UMask": "0x08", + "Unit": "CHA" + }, + { + "BriefDescription": "Rsp*Fwd*WB Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSP_FWD_WB", + "PerPkg": "1", + "PublicDescription": "Counts when a transaction with the opcode type Rsp*Fwd*WB Snoop Response was received which indicates the data was written back to it's home socket, and the cacheline was forwarded to the requestor socket. This snoop response is only used in >= 4 socket systems. It is used when a snoop HITM's in a remote caching agent and it directly forwards data to a requestor, and simultaneously returns data to it's home socket to be written back to memory.", + "UMask": "0x20", + "Unit": "CHA" + }, + { + "BriefDescription": "Rsp*WB Snoop Responses Received", + "Counter": "0,1,2,3", + "EventCode": "0x5C", + "EventName": "UNC_CHA_SNOOP_RESP.RSP_WBWB", + "PerPkg": "1", + "PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home. This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured. This reponse will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.", + "UMask": "0x10", + "Unit": "CHA" + }, + { + "BriefDescription": "Clockticks of the IIO Traffic Controller", + "Counter": "0,1,2,3", + "EventCode": "0x1", + "EventName": "UNC_IIO_CLOCKTICKS", + "PerPkg": "1", + "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part0", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part1", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part2", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part3", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of 4 bytes made to IIO Part0 by the CPU", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of 4 bytes made to IIO Part1 by the CPU", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part1", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part2", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part3", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part0 by the CPU", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part1 by the CPU", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part1 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part2 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part3 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part0 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part1 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part2 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part3 to Memory", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x01", + "Unit": "IIO" + }, + { + "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken", + "Counter": "0,1,2,3", + "EventCode": "0x22", + "EventName": "UNC_M2M_BYPASS_M2M_Egress.NOT_TAKEN", + "PerPkg": "1", + "PublicDescription": "Counts traffic in which the M2M (Mesh to Memory) to iMC (Memory Controller) bypass was not taken", + "UMask": "0x2", + "Unit": "M2M" + }, + { + "BriefDescription": "Cycles when direct to core mode (which bypasses the CHA) was disabled", + "Counter": "0,1,2,3", + "EventCode": "0x24", + "EventName": "UNC_M2M_DIRECT2CORE_NOT_TAKEN_DIRSTATE", + "PerPkg": "1", + "PublicDescription": "Counts cycles when direct to core mode (which bypasses the CHA) was disabled", + "Unit": "M2M" + }, + { + "BriefDescription": "Messages sent direct to core (bypassing the CHA)", + "Counter": "0,1,2,3", + "EventCode": "0x23", + "EventName": "UNC_M2M_DIRECT2CORE_TAKEN", + "PerPkg": "1", + "PublicDescription": "Counts when messages were sent direct to core (bypassing the CHA)", + "Unit": "M2M" + }, + { + "BriefDescription": "Number of reads in which direct to core transaction were overridden", + "Counter": "0,1,2,3", + "EventCode": "0x25", + "EventName": "UNC_M2M_DIRECT2CORE_TXN_OVERRIDE", + "PerPkg": "1", + "PublicDescription": "Counts reads in which direct to core transactions (which would have bypassed the CHA) were overridden", + "Unit": "M2M" + }, + { + "BriefDescription": "Number of reads in which direct to Intel UPI transactions were overridden", + "Counter": "0,1,2,3", + "EventCode": "0x28", + "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_CREDITS", + "PerPkg": "1", + "PublicDescription": "Counts reads in which direct to Intel Ultra Path Interconnect (UPI) transactions (which would have bypassed the CHA) were overridden", + "Unit": "M2M" + }, + { + "BriefDescription": "Cycles when direct to Intel UPI was disabled", + "Counter": "0,1,2,3", + "EventCode": "0x27", + "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_DIRSTATE", + "PerPkg": "1", + "PublicDescription": "Counts cycles when the ability to send messages direct to the Intel Ultra Path Interconnect (bypassing the CHA) was disabled", + "Unit": "M2M" + }, + { + "BriefDescription": "Messages sent direct to the Intel UPI", + "Counter": "0,1,2,3", + "EventCode": "0x26", + "EventName": "UNC_M2M_DIRECT2UPI_TAKEN", + "PerPkg": "1", + "PublicDescription": "Counts when messages were sent direct to the Intel Ultra Path Interconnect (bypassing the CHA)", + "Unit": "M2M" + }, + { + "BriefDescription": "Number of reads that a message sent direct2 Intel UPI was overridden", + "Counter": "0,1,2,3", + "EventCode": "0x29", + "EventName": "UNC_M2M_DIRECT2UPI_TXN_OVERRIDE", + "PerPkg": "1", + "PublicDescription": "Counts when a read message that was sent direct to the Intel Ultra Path Interconnect (bypassing the CHA) was overridden", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory lookups (any state found)", + "Counter": "0,1,2,3", + "EventCode": "0x2D", + "EventName": "UNC_M2M_DIRECTORY_LOOKUP.ANY", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in Any State (A, I, S or unused)", + "UMask": "0x1", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ", + "Counter": "0,1,2,3", + "EventCode": "0x2D", + "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in the A (SnoopAll) state, indicating the cacheline is stored in another socket in any state, and we must snoop the other sockets to make sure we get the latest data. The data may be stored in any state in the local socket.", + "UMask": "0x8", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ", + "Counter": "0,1,2,3", + "EventCode": "0x2D", + "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the I (Invalid) state indicating the cacheline is not stored in another socket, and so there is no need to snoop the other sockets for the latest data. The data may be stored in any state in the local socket.", + "UMask": "0x2", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ", + "Counter": "0,1,2,3", + "EventCode": "0x2D", + "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the S (Shared) state indicating the cacheline is either stored in another socket in the S(hared) state , and so there is no need to snoop the other sockets for the latest data. The data may be stored in any state in the local socket.", + "UMask": "0x4", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from A to I", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2I", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to I (Invalid)", + "UMask": "0x20", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from A to S", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2S", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to S (Shared)", + "UMask": "0x40", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory to a new state", + "UMask": "0x1", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from I to A", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2A", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to A (SnoopAll)", + "UMask": "0x4", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from I to S", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2S", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to S (Shared)", + "UMask": "0x2", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from S to A", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2A", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to A (SnoopAll)", + "UMask": "0x10", + "Unit": "M2M" + }, + { + "BriefDescription": "Multi-socket cacheline Directory update from S to I", + "Counter": "0,1,2,3", + "EventCode": "0x2E", + "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2I", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to I (Invalid)", + "UMask": "0x8", + "Unit": "M2M" + }, + { + "BriefDescription": "Reads to iMC issued", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_M2M_IMC_READS.ALL", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ", + "UMask": "0x4", + "Unit": "M2M" + }, + { + "BriefDescription": "Reads to iMC issued at Normal Priority (Non-Isochronous)", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_M2M_IMC_READS.NORMAL", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). It only counts normal priority non-isochronous reads.", + "UMask": "0x1", + "Unit": "M2M" + }, + { + "BriefDescription": "Writes to iMC issued", + "Counter": "0,1,2,3", + "EventCode": "0x38", + "EventName": "UNC_M2M_IMC_WRITES.ALL", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues writes to the iMC (Memory Controller).", + "UMask": "0x10", + "Unit": "M2M" + }, + { + "BriefDescription": "Partial Non-Isochronous writes to the iMC", + "Counter": "0,1,2,3", + "EventCode": "0x38", + "EventName": "UNC_M2M_IMC_WRITES.PARTIAL", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues partial writes to the iMC (Memory Controller). It only counts normal priority non-isochronous writes.", + "UMask": "0x2", + "Unit": "M2M" + }, + { + "BriefDescription": "Prefecth requests that got turn into a demand request", + "Counter": "0,1,2,3", + "EventCode": "0x56", + "EventName": "UNC_M2M_PREFCAM_DEMAND_PROMOTIONS", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) promotes a outstanding request in the prefetch queue due to a subsequent demand read request that entered the M2M with the same address. Explanatory Side Note: The Prefecth queue is made of CAM (Content Addressable Memory)", + "Unit": "M2M" + }, + { + "BriefDescription": "Inserts into the Memory Controller Prefetch Queue", + "Counter": "0,1,2,3", + "EventCode": "0x57", + "EventName": "UNC_M2M_PREFCAM_INSERTS", + "PerPkg": "1", + "PublicDescription": "Counts when the M2M (Mesh to Memory) recieves a prefetch request and inserts it into its outstanding prefetch queue. Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Ingress (from CMS) Queue Inserts", + "Counter": "0,1,2,3", + "EventCode": "0x1", + "EventName": "UNC_M2M_RxC_AD_INSERTS", + "PerPkg": "1", + "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ", + "Unit": "M2M" + }, + { + "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.", + "Counter": "0,1,2,3", + "EventCode": "0x29", + "EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN", + "PerPkg": "1", + "PublicDescription": "Count cases where flow control queue that sits between the Intel Ultra Path Interconnect (UPI) and the mesh spawns a prefetch to the iMC (Memory Controller)", + "Unit": "M3UPI" + }, + { + "BriefDescription": "Clocks of the Intel Ultra Path Interconnect (UPI)", + "Counter": "0,1,2,3", + "EventCode": "0x1", + "EventName": "UNC_UPI_CLOCKTICKS", + "PerPkg": "1", + "PublicDescription": "Counts clockticks of the fixed frequency clock controlling the Intel Ultra Path Interconnect (UPI). This clock runs at1/8th the 'GT/s' speed of the UPI link. For example, a 9.6GT/s link will have a fixed Frequency of 1.2 Ghz.", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Data Response packets that go direct to core", + "Counter": "0,1,2,3", + "EventCode": "0x12", + "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2C", + "PerPkg": "1", + "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to core bypassing the CHA.", + "UMask": "0x1", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Data Response packets that go direct to Intel UPI", + "Counter": "0,1,2,3", + "EventCode": "0x12", + "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2U", + "PerPkg": "1", + "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to Intel Ultra Path Interconnect (UPI) bypassing the CHA .", + "UMask": "0x2", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Cycles Intel UPI is in L1 power mode (shutdown)", + "Counter": "0,1,2,3", + "EventCode": "0x21", + "EventName": "UNC_UPI_L1_POWER_CYCLES", + "PerPkg": "1", + "PublicDescription": "Counts cycles when the Intel Ultra Path Interconnect (UPI) is in L1 power mode. L1 is a mode that totally shuts down the UPI link. Link power states are per link and per direction, so for example the Tx direction could be in one state while Rx was in another, this event only coutns when both links are shutdown.", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Cycles the Rx of the Intel UPI is in L0p power mode", + "Counter": "0,1,2,3", + "EventCode": "0x25", + "EventName": "UNC_UPI_RxL0P_POWER_CYCLES", + "PerPkg": "1", + "PublicDescription": "Counts cycles when the the receive side (Rx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.", + "Unit": "UPI LL" + }, + { + "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer", + "Counter": "0,1,2,3", + "EventCode": "0x31", + "EventName": "UNC_UPI_RxL_BYPASSED.SLOT0", + "PerPkg": "1", + "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.", + "UMask": "0x1", + "Unit": "UPI LL" + }, + { + "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer", + "Counter": "0,1,2,3", + "EventCode": "0x31", + "EventName": "UNC_UPI_RxL_BYPASSED.SLOT1", + "PerPkg": "1", + "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer (Receive Queue) and passed directly across the BGF and into the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.", + "UMask": "0x2", + "Unit": "UPI LL" + }, + { + "BriefDescription": "FLITs received which bypassed the Slot0 Recieve Buffer", + "Counter": "0,1,2,3", + "EventCode": "0x31", + "EventName": "UNC_UPI_RxL_BYPASSED.SLOT2", + "PerPkg": "1", + "PublicDescription": "Counts incoming FLITs (FLow control unITs) whcih bypassed the slot2 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.", + "UMask": "0x4", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Valid data FLITs received from any slot", + "Counter": "0,1,2,3", + "EventCode": "0x3", + "EventName": "UNC_UPI_RxL_FLITS.ALL_DATA", + "PerPkg": "1", + "PublicDescription": "Counts valid data FLITs (80 bit FLow control unITs: 64bits of data) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.", + "UMask": "0x0F", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Null FLITs received from any slot", + "Counter": "0,1,2,3", + "EventCode": "0x3", + "EventName": "UNC_UPI_RxL_FLITS.ALL_NULL", + "PerPkg": "1", + "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.", + "UMask": "0x27", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Protocol header and credit FLITs received from any slot", + "Counter": "0,1,2,3", + "EventCode": "0x3", + "EventName": "UNC_UPI_RxL_FLITS.NON_DATA", + "PerPkg": "1", + "PublicDescription": "Counts protocol header and credit FLITs (80 bit FLow control unITs) received from any of the 3 UPI slots on this UPI unit.", + "UMask": "0x97", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Cycles in which the Tx of the Intel Ultra Path Interconnect (UPI) is in L0p power mode", + "Counter": "0,1,2,3", + "EventCode": "0x27", + "EventName": "UNC_UPI_TxL0P_POWER_CYCLES", + "PerPkg": "1", + "PublicDescription": "Counts cycles when the transmit side (Tx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.", + "Unit": "UPI LL" + }, + { + "BriefDescription": "FLITs that bypassed the TxL Buffer", + "Counter": "0,1,2,3", + "EventCode": "0x41", + "EventName": "UNC_UPI_TxL_BYPASSED", + "PerPkg": "1", + "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the TxL(transmit) FLIT buffer and pass directly out the UPI Link. Generally, when data is transmitted across the Intel Ultra Path Interconnect (UPI), it will bypass the TxQ and pass directly to the link. However, the TxQ will be used in L0p (Low Power) mode and (Link Layer Retry) LLR mode, increasing latency to transfer out to the link.", + "Unit": "UPI LL" + }, + { + "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UPI_DATA_BANDWIDTH_TX", + "PerPkg": "1", + "ScaleUnit": "7.11E-06Bytes", + "UMask": "0x0F", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Null FLITs transmitted from any slot", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL", + "PerPkg": "1", + "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.", + "UMask": "0x27", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Idle FLITs transmitted", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_UPI_TxL_FLITS.IDLE", + "PerPkg": "1", + "PublicDescription": "Counts when the Intel Ultra Path Interconnect(UPI) transmits an idle FLIT(80 bit FLow control unITs). Every UPI cycle must be sending either data FLITs, protocol/credit FLITs or idle FLITs.", + "UMask": "0x47", + "Unit": "UPI LL" + }, + { + "BriefDescription": "Protocol header and credit FLITs transmitted across any slot", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_UPI_TxL_FLITS.NON_DATA", + "PerPkg": "1", + "PublicDescription": "Counts protocol header and credit FLITs (80 bit FLow control unITs) transmitted across any of the 3 UPI (Ultra Path Interconnect) slots on this UPI unit.", + "UMask": "0x97", + "Unit": "UPI LL" + } +] From 52839e653b5629bd237ad2ecdd8fdd897fcd5712 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:21:55 -0700 Subject: [PATCH 219/253] perf tools: Add support for printing new mem_info encodings Add decoding for the new "lvlx" and "snoopx" meminfo fields added earlier to the kernel so that "perf mem report" and other tools can print it properly. v2: Merge with persistent memory patch. Switch to new bit encoding for each combination. v3: Switch to generic lvlnum field. Signed-off-by: Andi Kleen Acked-by: Peter Zijlstra Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170816222156.19953-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 30 +++++++++++++++++-- tools/perf/util/mem-events.c | 43 +++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 642db5fa3286..2a37ae925d85 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -954,14 +954,20 @@ union perf_mem_data_src { mem_snoop:5, /* snoop mode */ mem_lock:2, /* lock instr */ mem_dtlb:7, /* tlb access */ - mem_rsvd:31; + mem_lvl_num:4, /* memory hierarchy level number */ + mem_remote:1, /* remote */ + mem_snoopx:2, /* snoop mode, ext */ + mem_rsvd:24; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:31, + __u64 mem_rsvd:24, + mem_snoopx:2, /* snoop mode, ext */ + mem_remote:1, /* remote */ + mem_lvl_num:4, /* memory hierarchy level number */ mem_dtlb:7, /* tlb access */ mem_lock:2, /* lock instr */ mem_snoop:5, /* snoop mode */ @@ -998,6 +1004,22 @@ union perf_mem_data_src { #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ #define PERF_MEM_LVL_SHIFT 5 +#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ +/* 5-0xa available */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + /* snoop mode */ #define PERF_MEM_SNOOP_NA 0x01 /* not available */ #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ @@ -1006,6 +1028,10 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ #define PERF_MEM_SNOOP_SHIFT 19 +#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ +/* 1 free */ +#define PERF_MEM_SNOOPX_SHIFT 37 + /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 06f5a3a4295c..ced4f3fff035 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -166,11 +166,20 @@ static const char * const mem_lvl[] = { "Uncached", }; +static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", + [PERF_MEM_LVLNUM_LFB] = "LFB", + [PERF_MEM_LVLNUM_RAM] = "RAM", + [PERF_MEM_LVLNUM_PMEM] = "PMEM", + [PERF_MEM_LVLNUM_NA] = "N/A", +}; + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; u64 hit, miss; + int printed; if (mem_info) m = mem_info->data_src.mem_lvl; @@ -184,17 +193,37 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); + + if (mem_info && mem_info->data_src.mem_remote) { + strcat(out, "Remote "); + l += 7; + } + + printed = 0; for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { if (!(m & 0x1)) continue; - if (l) { + if (printed++) { strcat(out, " or "); l += 4; } l += scnprintf(out + l, sz - l, mem_lvl[i]); } - if (*out == '\0') - l += scnprintf(out, sz - l, "N/A"); + + if (mem_info && mem_info->data_src.mem_lvl_num) { + int lvl = mem_info->data_src.mem_lvl_num; + if (printed++) { + strcat(out, " or "); + l += 4; + } + if (mem_lvlnum[lvl]) + l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); + else + l += scnprintf(out + l, sz - l, "L%d", lvl); + } + + if (l == 0) + l += scnprintf(out + l, sz - l, "N/A"); if (hit) l += scnprintf(out + l, sz - l, " hit"); if (miss) @@ -231,6 +260,14 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) } l += scnprintf(out + l, sz - l, snoop_access[i]); } + if (mem_info && + (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) { + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, "Fwd"); + } if (*out == '\0') l += scnprintf(out, sz - l, "N/A"); From 3067eaa7ce2dbcde89d87277cdbc91c211480060 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:21:56 -0700 Subject: [PATCH 220/253] perf test: Add test cases for new data source encoding Add some simple tests to perf test to test data source printing. v2: Make the tests actually checked for the correct name of Forward v3: Adjust to new encoding Committer notes: Avoid the in place declaration to make this build with older compilers, for instance, in Debian 7 we get: tests/mem.c: In function 'test__mem': tests/mem.c:30:5: error: missing initializer [-Werror=missing-field-initializers] tests/mem.c:30:5: error: (near initialization for '(anonymous)..mem_snoop') [-Werror=missing-field-initializers] So just zero a struct, then go on building the unions as needed, reusing settings from the previous test, i.e. local -> remote, etc. Signed-off-by: Andi Kleen Acked-by: Peter Zijlstra Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170816222156.19953-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 +++ tools/perf/tests/mem.c | 56 +++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 62 insertions(+) create mode 100644 tools/perf/tests/mem.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 84222bdb8689..87bf3edb037c 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -34,6 +34,7 @@ perf-y += thread-map.o perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o perf-y += bpf.o perf-y += topology.o +perf-y += mem.o perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 9ecc44e68990..377bea009163 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -47,6 +47,10 @@ static struct test generic_tests[] = { .desc = "Read samples using the mmap interface", .func = test__basic_mmap, }, + { + .desc = "Test data source output", + .func = test__mem, + }, { .desc = "Parse event definition strings", .func = test__parse_events, diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c new file mode 100644 index 000000000000..21952e1e6e6d --- /dev/null +++ b/tools/perf/tests/mem.c @@ -0,0 +1,56 @@ +#include "util/mem-events.h" +#include "util/symbol.h" +#include "linux/perf_event.h" +#include "util/debug.h" +#include "tests.h" +#include + +static int check(union perf_mem_data_src data_src, + const char *string) +{ + char out[100]; + char failure[100]; + struct mem_info mi = { .data_src = data_src }; + + int n; + + n = perf_mem__snp_scnprintf(out, sizeof out, &mi); + n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi); + snprintf(failure, sizeof failure, "unexpected %s", out); + TEST_ASSERT_VAL(failure, !strcmp(string, out)); + return 0; +} + +int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused) +{ + int ret = 0; + union perf_mem_data_src src; + + memset(&src, 0, sizeof(src)); + + src.mem_lvl = PERF_MEM_LVL_HIT; + src.mem_lvl_num = 4; + + ret |= check(src, "N/AL4 hit"); + + src.mem_remote = 1; + + ret |= check(src, "N/ARemote L4 hit"); + + src.mem_lvl = PERF_MEM_LVL_MISS; + src.mem_lvl_num = PERF_MEM_LVLNUM_PMEM; + src.mem_remote = 0; + + ret |= check(src, "N/APMEM miss"); + + src.mem_remote = 1; + + ret |= check(src, "N/ARemote PMEM miss"); + + src.mem_snoopx = PERF_MEM_SNOOPX_FWD; + src.mem_lvl_num = PERF_MEM_LVLNUM_RAM; + + ret |= check(src , "FwdRemote RAM miss"); + + return ret; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c46ae818aac8..921412a6a880 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -58,6 +58,7 @@ int test__python_use(struct test *test, int subtest); int test__bp_signal(struct test *test, int subtest); int test__bp_signal_overflow(struct test *test, int subtest); int test__task_exit(struct test *test, int subtest); +int test__mem(struct test *test, int subtest); int test__sw_clock_freq(struct test *test, int subtest); int test__code_reading(struct test *test, int subtest); int test__sample_parsing(struct test *test, int subtest); From 2826478a6660158d261bc49ad8954a8f5c39be07 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 20 Aug 2017 14:39:13 +0300 Subject: [PATCH 221/253] perf tools: Really install manpages via 'make install-man' Target install-man builds them but forget to install. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Shishkin Cc: Borislav Petkov Cc: Peter Zijlstra Fixes: af3df2cf17f5 ("perf tools: Try to build Documentation when installing") Link: http://lkml.kernel.org/r/150322915300.129715.13645857235229756834.stgit@buzz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 098cfb9ca8f0..db11478e30b4 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -192,7 +192,7 @@ do-install-man: man # $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \ # $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) -install-man: check-man-tools man +install-man: check-man-tools man do-install-man ifdef missing_tools DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) From ac0bb6b72f4bbab08f270a919406d971e73698b5 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 20 Aug 2017 14:39:20 +0300 Subject: [PATCH 222/253] perf: Fix documentation for sysctls perf_event_paranoid and perf_event_mlock_kb Fix misprint CAP_IOC_LOCK -> CAP_IPC_LOCK. This capability have nothing to do with raw tracepoints. This part is about bypassing mlock limits. Sysctl kernel.perf_event_paranoid = -1 allows raw and ftrace function tracepoints without CAP_SYS_ADMIN. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/150322916080.129746.11285255474738558340.stgit@buzz Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/sysctl/kernel.txt | 13 ++++++++++++- tools/perf/util/evsel.c | 4 +++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index bac23c198360..ce61d1fe08ca 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -61,6 +61,7 @@ show up in /proc/sys/kernel: - perf_cpu_time_max_percent - perf_event_paranoid - perf_event_max_stack +- perf_event_mlock_kb - perf_event_max_contexts_per_stack - pid_max - powersave-nap [ PPC only ] @@ -654,7 +655,9 @@ Controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). The default value is 2. -1: Allow use of (almost) all events by all users ->=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK + Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK +>=0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN + Disallow raw tracepoint access by users without CAP_SYS_ADMIN >=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN @@ -673,6 +676,14 @@ The default value is 127. ============================================================== +perf_event_mlock_kb: + +Control size of per-cpu ring buffer not counted agains mlock limit. + +The default value is 512 + 1 page + +============================================================== + perf_event_max_contexts_per_stack: Controls maximum number of stack frame context entries for diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5dfb8bc4db89..a5888c704e01 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2674,7 +2674,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "unprivileged users (without CAP_SYS_ADMIN).\n\n" "The current value is %d:\n\n" " -1: Allow use of (almost) all events by all users\n" - ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" + " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" + ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n" + " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n" ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" From ba335df4ea2a5011002c5cde53bab6d7f5d714d8 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 20 Aug 2017 14:39:27 +0300 Subject: [PATCH 223/253] perf tools: Fix static linking with libdw from elfutils Fix feature test for static libdw: link required dependencies. Backends of libebl are not statically linked thus libdl is required. In Debian/Ubuntu libdw-dev includes libebl.a starting from 0.166-1. Signed-off-by: Konstantin Khlebnikov Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/150322916720.129772.7959925864494283854.stgit@buzz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 37d203c4cd1f..bb4735b92ada 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -103,8 +103,12 @@ ifdef LIBDW_DIR LIBDW_CFLAGS := -I$(LIBDW_DIR)/include LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif +DWARFLIBS := -ldw +ifeq ($(findstring -static,${LDFLAGS}),-static) + DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 +endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) -FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw +FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) # for linking with debug library, run like: # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ @@ -365,10 +369,6 @@ ifndef NO_LIBELF else CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) - DWARFLIBS := -ldw - ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 - endif EXTLIBS += ${DWARFLIBS} $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS From 60913e005c8d19ec5187a638eafdd088509dfb9e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 20 Aug 2017 14:39:32 +0300 Subject: [PATCH 224/253] perf tools: Fix static linking with libunwind * libunwind-x86_64 must be linked before libunwind * libunwind requires liblzma * static libunwind conflicts with static libgcc_eh Signed-off-by: Konstantin Khlebnikov Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/150322917247.129799.14247751517961953155.stgit@buzz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index bb4735b92ada..6a64c6bbd9a5 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -35,7 +35,7 @@ ifeq ($(SRCARCH),x86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S - LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma $(call detected,CONFIG_X86_64) else LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind @@ -505,6 +505,10 @@ ifndef NO_LOCAL_LIBUNWIND EXTLIBS += $(LIBUNWIND_LIBS) LDFLAGS += $(LIBUNWIND_LIBS) endif +ifeq ($(findstring -static,${LDFLAGS}),-static) + # gcc -static links libgcc_eh which contans piece of libunwind + LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition +endif ifndef NO_LIBUNWIND CFLAGS += -DHAVE_LIBUNWIND_SUPPORT From 2ab346cfb0decf01523949e29f5cf542f2304611 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Aug 2017 17:18:16 +0100 Subject: [PATCH 225/253] perf/aux: Make aux_{head,wakeup} ring_buffer members long The aux_head and aux_wakeup members of struct ring_buffer are defined using the local_t type, despite the fact that they are only accessed via the perf_aux_output_*() functions, which cannot race with each other for a given ring buffer. This patch changes the type of the members to long, so we can avoid using the local_*() API where it isn't needed. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h | 4 ++-- kernel/events/ring_buffer.c | 31 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 486fd78eb8d5..2941b868353c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -38,9 +38,9 @@ struct ring_buffer { struct user_struct *mmap_user; /* AUX area */ - local_t aux_head; + long aux_head; local_t aux_nest; - local_t aux_wakeup; + long aux_wakeup; unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ee97196bb151..25437fda56e3 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -367,7 +367,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) goto err_put; - aux_head = local_read(&rb->aux_head); + aux_head = rb->aux_head; handle->rb = rb; handle->event = event; @@ -382,7 +382,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, */ if (!rb->aux_overwrite) { aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); - handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); @@ -433,12 +433,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; - local_set(&rb->aux_head, aux_head); + rb->aux_head = aux_head; } else { handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; - aux_head = local_read(&rb->aux_head); - local_add(size, &rb->aux_head); + aux_head = rb->aux_head; + rb->aux_head += size; } if (size || handle->aux_flags) { @@ -450,11 +450,10 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags); } - aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); - - if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - local_add(rb->aux_watermark, &rb->aux_wakeup); + rb->aux_wakeup += rb->aux_watermark; } if (wakeup) { @@ -478,22 +477,20 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - unsigned long aux_head; if (size > handle->size) return -ENOSPC; - local_add(size, &rb->aux_head); + rb->aux_head += size; - aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); - if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - local_add(rb->aux_watermark, &rb->aux_wakeup); - handle->wakeup = local_read(&rb->aux_wakeup) + - rb->aux_watermark; + rb->aux_wakeup += rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; } - handle->head = aux_head; + handle->head = rb->aux_head; handle->size -= size; return 0; From d9a50b0256f06bd39a1bed1ba40baec37c356b11 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 16 Aug 2017 17:18:17 +0100 Subject: [PATCH 226/253] perf/aux: Ensure aux_wakeup represents most recent wakeup index The aux_watermark member of struct ring_buffer represents the period (in terms of bytes) at which wakeup events should be generated when data is written to the aux buffer in non-snapshot mode. On hardware that cannot generate an interrupt when the aux_head reaches an arbitrary wakeup index (such as ARM SPE), the aux_head sampled from handle->head in perf_aux_output_{skip,end} may in fact be past the wakeup index. This can lead to wakeup slowly falling behind the head. For example, consider the case where hardware can only generate an interrupt on a page-boundary and the aux buffer is initialised as follows: // Buffer size is 2 * PAGE_SIZE rb->aux_head = rb->aux_wakeup = 0 rb->aux_watermark = PAGE_SIZE / 2 following the first perf_aux_output_begin call, the handle is initialised with: handle->head = 0 handle->size = 2 * PAGE_SIZE handle->wakeup = PAGE_SIZE / 2 and the hardware will be programmed to generate an interrupt at PAGE_SIZE. When the interrupt is raised, the hardware head will be at PAGE_SIZE, so calling perf_aux_output_end(handle, PAGE_SIZE) puts the ring buffer into the following state: rb->aux_head = PAGE_SIZE rb->aux_wakeup = PAGE_SIZE / 2 rb->aux_watermark = PAGE_SIZE / 2 and then the next call to perf_aux_output_begin will result in: handle->head = handle->wakeup = PAGE_SIZE for which the semantics are unclear and, for a smaller aux_watermark (e.g. PAGE_SIZE / 4), then the wakeup would in fact be behind head at this point. This patch fixes the problem by rounding down the aux_head (as sampled from the handle) to the nearest aux_watermark boundary when updating rb->aux_wakeup, therefore taking into account any overruns by the hardware. Reported-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h | 2 +- kernel/events/ring_buffer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2941b868353c..5377c591c57a 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,7 +40,7 @@ struct ring_buffer { /* AUX area */ long aux_head; local_t aux_nest; - long aux_wakeup; + long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 25437fda56e3..af71a84e12ee 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -453,7 +453,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); } if (wakeup) { @@ -486,7 +486,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; } From 95298355143f9765f0d40ed57dce7fa6571cc623 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:21:53 -0700 Subject: [PATCH 227/253] perf/x86: Move Nehalem PEBS code to flag Minor cleanup: use an explicit x86_pmu flag to handle the missing Lock / TLB information on Nehalem, instead of always checking the model number for each PEBS sample. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/20170816222156.19953-2-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 1 + arch/x86/events/intel/ds.c | 5 +---- arch/x86/events/perf_event.h | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 98b0f0729527..c3439a36dcf9 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3905,6 +3905,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); + x86_pmu.pebs_no_tlb = 1; pr_cont("Nehalem events, "); break; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index a322fed5f8ed..3ccdf8cb4495 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -149,8 +149,6 @@ static u64 load_latency_data(u64 status) { union intel_x86_pebs_dse dse; u64 val; - int model = boot_cpu_data.x86_model; - int fam = boot_cpu_data.x86; dse.val = status; @@ -162,8 +160,7 @@ static u64 load_latency_data(u64 status) /* * Nehalem models do not support TLB, Lock infos */ - if (fam == 0x6 && (model == 26 || model == 30 - || model == 31 || model == 46)) { + if (x86_pmu.pebs_no_tlb) { val |= P(TLB, NA) | P(LOCK, NA); return val; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 476aec3a4cab..2e9636e4068f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -591,7 +591,8 @@ struct x86_pmu { pebs :1, pebs_active :1, pebs_broken :1, - pebs_prec_dist :1; + pebs_prec_dist :1, + pebs_no_tlb :1; int pebs_record_size; int pebs_buffer_size; void (*drain_pebs)(struct pt_regs *regs); From 6ae5fa61d27dcb055f4198bcf6c8dbbf1bb33f52 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:21:54 -0700 Subject: [PATCH 228/253] perf/x86: Fix data source decoding for Skylake Skylake changed the encoding of the PEBS data source field. Some combinations are not available anymore, but some new cases e.g. for L4 cache hit are added. Fix up the conversion table for Skylake, similar as had been done for Nehalem. On Skylake server the encoding for L4 actually means persistent memory. Handle this case too. To properly describe it in the abstracted perf format I had to add some new fields. Since a hit can have only one level add a new field that is an enumeration, not a bit field to describe the level. It can describe any level. Some numbers are also used to describe PMEM and LFB. Also add a new generic remote flag that can be combined with the generic level to signify a remote cache. And there is an extension field for the snoop indication to handle the Forward state. I didn't add a generic flag for hops because it's not needed for Skylake. I changed the existing encodings for older CPUs to also fill in the new level and remote fields. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/20170816222156.19953-3-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 ++ arch/x86/events/intel/ds.c | 51 +++++++++++++++++++++------------ arch/x86/events/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 30 +++++++++++++++++-- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c3439a36dcf9..6f342001ec6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4208,6 +4208,8 @@ __init int intel_pmu_init(void) skl_format_attr); WARN_ON(!x86_pmu.format_attrs); x86_pmu.cpu_events = hsw_events_attrs; + intel_pmu_pebs_data_source_skl( + boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); pr_cont("Skylake events, "); break; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3ccdf8cb4495..98e36e0c791c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -49,34 +49,47 @@ union intel_x86_pebs_dse { */ #define P(a, b) PERF_MEM_S(a, b) #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) /* Version for Sandy Bridge and later */ static u64 pebs_data_source[] = { - P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ - OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ - OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ - OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ - OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ - OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ - OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ - OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ - OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ - OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ - OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ - OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ - OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */ - OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */ - OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */ - OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ }; /* Patch up minor differences in the bits */ void __init intel_pmu_pebs_data_source_nhm(void) { - pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT); - pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); - pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM); + pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_skl(bool pmem) +{ + u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); + + pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); + pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); + pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); + pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); } static u64 precise_store_data(u64 status) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2e9636e4068f..0f7dad8bd358 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -948,6 +948,8 @@ void intel_pmu_lbr_init_knl(void); void intel_pmu_pebs_data_source_nhm(void); +void intel_pmu_pebs_data_source_skl(bool pmem); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 642db5fa3286..2a37ae925d85 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -954,14 +954,20 @@ union perf_mem_data_src { mem_snoop:5, /* snoop mode */ mem_lock:2, /* lock instr */ mem_dtlb:7, /* tlb access */ - mem_rsvd:31; + mem_lvl_num:4, /* memory hierarchy level number */ + mem_remote:1, /* remote */ + mem_snoopx:2, /* snoop mode, ext */ + mem_rsvd:24; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:31, + __u64 mem_rsvd:24, + mem_snoopx:2, /* snoop mode, ext */ + mem_remote:1, /* remote */ + mem_lvl_num:4, /* memory hierarchy level number */ mem_dtlb:7, /* tlb access */ mem_lock:2, /* lock instr */ mem_snoop:5, /* snoop mode */ @@ -998,6 +1004,22 @@ union perf_mem_data_src { #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ #define PERF_MEM_LVL_SHIFT 5 +#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ +/* 5-0xa available */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + /* snoop mode */ #define PERF_MEM_SNOOP_NA 0x01 /* not available */ #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ @@ -1006,6 +1028,10 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ #define PERF_MEM_SNOOP_SHIFT 19 +#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ +/* 1 free */ +#define PERF_MEM_SNOOPX_SHIFT 37 + /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ From 1d953111b648e48923171c3c9cf17be2250544fa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 22 Aug 2017 17:59:28 +0200 Subject: [PATCH 229/253] perf/core: Don't report zero PIDs for exiting tasks The exiting/dead task has no PIDs and in this case perf_event_pid/tid() return zero, change them to return -1 to distinguish this case from idle threads. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170822155928.GA6892@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1ac5015bab04..b411321b6c26 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1249,26 +1249,31 @@ unclone_ctx(struct perf_event_context *ctx) return parent_ctx; } -static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) +static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p, + enum pid_type type) { + u32 nr; /* * only top level events have the pid namespace they were created in */ if (event->parent) event = event->parent; - return task_tgid_nr_ns(p, event->ns); + nr = __task_pid_nr_ns(p, type, event->ns); + /* avoid -1 if it is idle thread or runs in another ns */ + if (!nr && !pid_alive(p)) + nr = -1; + return nr; +} + +static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) +{ + return perf_event_pid_type(event, p, __PIDTYPE_TGID); } static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) { - /* - * only top level events have the pid namespace they were created in - */ - if (event->parent) - event = event->parent; - - return task_pid_nr_ns(p, event->ns); + return perf_event_pid_type(event, p, PIDTYPE_PID); } /* From d0618410eced4eb092295fad10312a4545fcdfaf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 22 Aug 2017 19:22:43 +0200 Subject: [PATCH 230/253] tracing, perf: Adjust code layout in get_recursion_context() In an XDP redirect applications using tracepoint xdp:xdp_redirect to diagnose TX overrun, I noticed perf_swevent_get_recursion_context() was consuming 2% CPU. This was reduced to 1.85% with this simple change. Looking at the annotated asm code, it was clear that the unlikely case in_nmi() test was chosen (by the compiler) as the most likely event/branch. This small adjustment makes the compiler (GCC version 7.1.1 20170622 (Red Hat 7.1.1-3)) put in_nmi() as an unlikely branch. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150342256382.16595.986861478681783732.stgit@firesoul Signed-off-by: Ingo Molnar --- kernel/events/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 5377c591c57a..843e97047335 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -208,7 +208,7 @@ static inline int get_recursion_context(int *recursion) { int rctx; - if (in_nmi()) + if (unlikely(in_nmi())) rctx = 3; else if (in_irq()) rctx = 2; From a5df70c354c26e20d5fd8eb64517f724e97ef0b2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 22 Aug 2017 11:52:00 -0700 Subject: [PATCH 231/253] perf/x86: Only show format attributes when supported Only show the Intel format attributes in sysfs when the feature is actually supported with the current model numbers. This allows programs to probe what format attributes are available, and give a sensible error message to users if they are not. This handles near all cases for intel attributes since Nehalem, except the (obscure) case when the model number if known, but PEBS is disabled in PERF_CAPABILITIES. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170822185201.9261-2-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 48 ++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 6f342001ec6a..b00f1353a026 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3415,12 +3415,26 @@ static struct attribute *intel_arch3_formats_attr[] = { &format_attr_any.attr, &format_attr_inv.attr, &format_attr_cmask.attr, + NULL, +}; + +static struct attribute *hsw_format_attr[] = { &format_attr_in_tx.attr, &format_attr_in_tx_cp.attr, + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + NULL +}; - &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ - &format_attr_ldlat.attr, /* PEBS load latency */ - NULL, +static struct attribute *nhm_format_attr[] = { + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + NULL +}; + +static struct attribute *slm_format_attr[] = { + &format_attr_offcore_rsp.attr, + NULL }; static struct attribute *skl_format_attr[] = { @@ -3795,6 +3809,7 @@ __init int intel_pmu_init(void) unsigned int unused; struct extra_reg *er; int version, i; + struct attribute **extra_attr = NULL; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -3906,6 +3921,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_nhm(); x86_add_quirk(intel_nehalem_quirk); x86_pmu.pebs_no_tlb = 1; + extra_attr = nhm_format_attr; pr_cont("Nehalem events, "); break; @@ -3941,6 +3957,7 @@ __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.cpu_events = slm_events_attrs; + extra_attr = slm_format_attr; pr_cont("Silvermont events, "); break; @@ -3966,6 +3983,7 @@ __init int intel_pmu_init(void) x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.cpu_events = glm_events_attrs; + extra_attr = slm_format_attr; pr_cont("Goldmont events, "); break; @@ -3992,6 +4010,7 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ event_attr_td_total_slots_scale_glm.event_str = "4"; + extra_attr = slm_format_attr; pr_cont("Goldmont plus events, "); break; @@ -4021,6 +4040,7 @@ __init int intel_pmu_init(void) X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); intel_pmu_pebs_data_source_nhm(); + extra_attr = nhm_format_attr; pr_cont("Westmere events, "); break; @@ -4057,6 +4077,8 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); + extra_attr = nhm_format_attr; + pr_cont("SandyBridge events, "); break; @@ -4091,6 +4113,8 @@ __init int intel_pmu_init(void) intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + extra_attr = nhm_format_attr; + pr_cont("IvyBridge events, "); break; @@ -4119,6 +4143,8 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = hsw_get_event_constraints; x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.lbr_double_abort = true; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; pr_cont("Haswell events, "); break; @@ -4155,6 +4181,8 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = hsw_get_event_constraints; x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.limit_period = bdw_limit_period; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; pr_cont("Broadwell events, "); break; @@ -4173,7 +4201,7 @@ __init int intel_pmu_init(void) /* all extra regs are per-cpu when HT is on */ x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - + extra_attr = slm_format_attr; pr_cont("Knights Landing/Mill events, "); break; @@ -4204,9 +4232,9 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, - skl_format_attr); - WARN_ON(!x86_pmu.format_attrs); + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + hsw_format_attr : nhm_format_attr; + extra_attr = merge_attr(extra_attr, skl_format_attr); x86_pmu.cpu_events = hsw_events_attrs; intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); @@ -4229,6 +4257,12 @@ __init int intel_pmu_init(void) } } + if (version >= 2 && extra_attr) { + x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, + extra_attr); + WARN_ON(!x86_pmu.format_attrs); + } + if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); From b00233b5306512a09e339d69ef5e390a77f2d302 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 22 Aug 2017 11:52:01 -0700 Subject: [PATCH 232/253] perf/x86: Export some PMU attributes in caps/ directory It can be difficult to figure out for user programs what features the x86 CPU PMU driver actually supports. Currently it requires grepping in dmesg, but dmesg is not always available. This adds a caps directory to /sys/bus/event_source/devices/cpu/, similar to the caps already used on intel_pt, which can be used to discover the available capabilities cleanly. Three capabilities are defined: - pmu_name: Underlying CPU name known to the driver - max_precise: Max precise level supported - branches: Known depth of LBR. Example: % grep . /sys/bus/event_source/devices/cpu/caps/* /sys/bus/event_source/devices/cpu/caps/branches:32 /sys/bus/event_source/devices/cpu/caps/max_precise:3 /sys/bus/event_source/devices/cpu/caps/pmu_name:skylake Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170822185201.9261-3-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 38 ++++++++++++++------- arch/x86/events/intel/core.c | 66 +++++++++++++++++++++++++++++++++++- arch/x86/events/perf_event.h | 3 ++ 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index af12e294caed..d5f98095a155 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -487,22 +487,28 @@ static inline int precise_br_compat(struct perf_event *event) return m == b; } +int x86_pmu_max_precise(void) +{ + int precise = 0; + + /* Support for constant skid */ + if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { + precise++; + + /* Support for IP fixup */ + if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) + precise++; + + if (x86_pmu.pebs_prec_dist) + precise++; + } + return precise; +} + int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { - int precise = 0; - - /* Support for constant skid */ - if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { - precise++; - - /* Support for IP fixup */ - if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2) - precise++; - - if (x86_pmu.pebs_prec_dist) - precise++; - } + int precise = x86_pmu_max_precise(); if (event->attr.precise_ip > precise) return -EOPNOTSUPP; @@ -1752,6 +1758,10 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) static struct attribute_group x86_pmu_attr_group; +static struct attribute_group x86_pmu_caps_group = { + .name = "caps", +}; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1798,6 +1808,7 @@ static int __init init_hw_perf_events(void) 0, x86_pmu.num_counters, 0, 0); x86_pmu_format_group.attrs = x86_pmu.format_attrs; + x86_pmu_caps_group.attrs = x86_pmu.caps_attrs; if (x86_pmu.event_attrs) x86_pmu_events_group.attrs = x86_pmu.event_attrs; @@ -2217,6 +2228,7 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { &x86_pmu_attr_group, &x86_pmu_format_group, &x86_pmu_events_group, + &x86_pmu_caps_group, NULL, }; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index b00f1353a026..8fa2abd9c8b6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3795,6 +3795,46 @@ static ssize_t freeze_on_smi_store(struct device *cdev, static DEVICE_ATTR_RW(freeze_on_smi); +static ssize_t branches_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); +} + +static DEVICE_ATTR_RO(branches); + +static struct attribute *lbr_attrs[] = { + &dev_attr_branches.attr, + NULL +}; + +static char pmu_name_str[30]; + +static ssize_t pmu_name_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str); +} + +static DEVICE_ATTR_RO(pmu_name); + +static ssize_t max_precise_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); +} + +static DEVICE_ATTR_RO(max_precise); + +static struct attribute *intel_pmu_caps_attrs[] = { + &dev_attr_pmu_name.attr, + &dev_attr_max_precise.attr, + NULL +}; + static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, NULL, @@ -3810,6 +3850,7 @@ __init int intel_pmu_init(void) struct extra_reg *er; int version, i; struct attribute **extra_attr = NULL; + char *name; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -3877,6 +3918,7 @@ __init int intel_pmu_init(void) switch (boot_cpu_data.x86_model) { case INTEL_FAM6_CORE_YONAH: pr_cont("Core events, "); + name = "core"; break; case INTEL_FAM6_CORE2_MEROM: @@ -3892,6 +3934,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_core2_event_constraints; x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; pr_cont("Core2 events, "); + name = "core2"; break; case INTEL_FAM6_NEHALEM: @@ -3924,6 +3967,7 @@ __init int intel_pmu_init(void) extra_attr = nhm_format_attr; pr_cont("Nehalem events, "); + name = "nehalem"; break; case INTEL_FAM6_ATOM_PINEVIEW: @@ -3940,6 +3984,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_core2; pr_cont("Atom events, "); + name = "bonnell"; break; case INTEL_FAM6_ATOM_SILVERMONT1: @@ -3959,6 +4004,7 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = slm_events_attrs; extra_attr = slm_format_attr; pr_cont("Silvermont events, "); + name = "silvermont"; break; case INTEL_FAM6_ATOM_GOLDMONT: @@ -3985,6 +4031,7 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = glm_events_attrs; extra_attr = slm_format_attr; pr_cont("Goldmont events, "); + name = "goldmont"; break; case INTEL_FAM6_ATOM_GEMINI_LAKE: @@ -4012,6 +4059,7 @@ __init int intel_pmu_init(void) event_attr_td_total_slots_scale_glm.event_str = "4"; extra_attr = slm_format_attr; pr_cont("Goldmont plus events, "); + name = "goldmont_plus"; break; case INTEL_FAM6_WESTMERE: @@ -4042,6 +4090,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_nhm(); extra_attr = nhm_format_attr; pr_cont("Westmere events, "); + name = "westmere"; break; case INTEL_FAM6_SANDYBRIDGE: @@ -4080,6 +4129,7 @@ __init int intel_pmu_init(void) extra_attr = nhm_format_attr; pr_cont("SandyBridge events, "); + name = "sandybridge"; break; case INTEL_FAM6_IVYBRIDGE: @@ -4116,6 +4166,7 @@ __init int intel_pmu_init(void) extra_attr = nhm_format_attr; pr_cont("IvyBridge events, "); + name = "ivybridge"; break; @@ -4146,6 +4197,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; pr_cont("Haswell events, "); + name = "haswell"; break; case INTEL_FAM6_BROADWELL_CORE: @@ -4184,6 +4236,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; pr_cont("Broadwell events, "); + name = "broadwell"; break; case INTEL_FAM6_XEON_PHI_KNL: @@ -4203,6 +4256,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_NO_HT_SHARING; extra_attr = slm_format_attr; pr_cont("Knights Landing/Mill events, "); + name = "knights-landing"; break; case INTEL_FAM6_SKYLAKE_MOBILE: @@ -4239,6 +4293,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); pr_cont("Skylake events, "); + name = "skylake"; break; default: @@ -4246,6 +4301,7 @@ __init int intel_pmu_init(void) case 1: x86_pmu.event_constraints = intel_v1_event_constraints; pr_cont("generic architected perfmon v1, "); + name = "generic_arch_v1"; break; default: /* @@ -4253,10 +4309,13 @@ __init int intel_pmu_init(void) */ x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("generic architected perfmon, "); + name = "generic_arch_v2+"; break; } } + snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name); + if (version >= 2 && extra_attr) { x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, extra_attr); @@ -4309,8 +4368,13 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - if (x86_pmu.lbr_nr) + x86_pmu.caps_attrs = intel_pmu_caps_attrs; + + if (x86_pmu.lbr_nr) { + x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); + } + /* * Access extra MSR may cause #GP under certain circumstances. * E.g. KVM doesn't support offcore event diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 0f7dad8bd358..9337589014cc 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -558,6 +558,7 @@ struct x86_pmu { int attr_rdpmc; struct attribute **format_attrs; struct attribute **event_attrs; + struct attribute **caps_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); struct attribute **cpu_events; @@ -742,6 +743,8 @@ int x86_reserve_hardware(void); void x86_release_hardware(void); +int x86_pmu_max_precise(void); + void hw_perf_lbr_event_destroy(struct perf_event *event); int x86_setup_perfctr(struct perf_event *event); From 726647d0526c5c2f3472010677122b89d9e4ef88 Mon Sep 17 00:00:00 2001 From: Jack Henschel Date: Thu, 24 Aug 2017 15:20:22 +0200 Subject: [PATCH 233/253] perf stat: Fix path to PMU formats in documentation As defined in tools/perf/util/pmu.c, the EVENT_SOURCE_DEVICE_PATH is /sys/bus/event_source/devices/ (no traling 's' in event_source) This patch corrects the path in the perf stat documentation Signed-off-by: Jack Henschel Cc: Alexander Shishkin Cc: Jack Henschel Cc: Peter Zijlstra Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/20170824132022.10934-1-jackdev@mailbox.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 698076313606..c37d61682dfb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -41,13 +41,13 @@ report:: - a symbolically formed event like 'pmu/param1=0x3,param2/' where param1 and param2 are defined as formats for the PMU in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' where M, N, K are numbers (in decimal, hex, octal format). Acceptable values for each of 'config', 'config1' and 'config2' parameters are defined by corresponding entries in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* -i:: --no-inherit:: From 6bd76b8fabe157233e498931c3f9298ee7128a28 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 25 Aug 2017 15:45:10 -0300 Subject: [PATCH 234/253] perf tools: Fix static build with newer toolchains We can't pass --dynamic-list list into static build anymore, because compilers starts to scream about that. Fedora 26 started to fail build with following error: $ make LDFLAGS=-static ... /usr/bin/ld: dynamic STT_GNU_IFUNC symbol `strcmp' with pointer equality in `/usr/lib/gcc/x86_64-redhat-linux/7/../../../../lib64/libc.a(strcmp.o +)' can not be used when making an executable; recompile with -fPIE and relink with -pie There's no sense for --dynamic-list in static build, because there's no .dynsym table in static binary. Consequently the traceevent plugins have never worked with static build, but it was quietly passed by. To fix this in future I think we should add support to compile plugins within the perf binary directly for static build. Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-jeg6a7ff9j9hlqn8k4gllzvv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 70ddc65f898d..a700a079a218 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -279,7 +279,13 @@ LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) + +# +# The static build has no dynsym table, so this does not work for +# static build. Looks like linker starts to scream about that now +# (in Fedora 26) so we need to switch it off for static build. +DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) LIBAPI = $(API_PATH)libapi.a export LIBAPI From 12c15302dd4b768105d4b7a487ed4858ccab94fc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 10:57:32 +0200 Subject: [PATCH 235/253] perf c2c: Fix remote HITM detection for Skylake Skylake introduced new mem_remote bit in union perf_mem_data_src [1]. It applies to any other memory level to express Remote unknown level, as is reported by Skylake. Adding this extra check to c2c_decode_stats to properly decode remote HITMs on Skylake. [1] http://lkml.kernel.org/r/20170816222156.19953-4-andi@firstfloor.org Signed-off-by: Jiri Olsa Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824085732.28481-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ced4f3fff035..28afe5fa84d6 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -316,6 +316,11 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; u64 lock = data_src->mem_lock; + /* + * Skylake might report unknown remote level via this + * bit, consider it when evaluating remote HITMs. + */ + bool mrem = data_src->mem_remote; int err = 0; #define HITM_INC(__f) \ @@ -361,7 +366,8 @@ do { \ } if ((lvl & P(LVL, REM_RAM1)) || - (lvl & P(LVL, REM_RAM2))) { + (lvl & P(LVL, REM_RAM2)) || + mrem) { stats->rmt_dram++; if (snoop & P(SNOOP, HIT)) stats->ld_shared++; @@ -371,7 +377,8 @@ do { \ } if ((lvl & P(LVL, REM_CCE1)) || - (lvl & P(LVL, REM_CCE2))) { + (lvl & P(LVL, REM_CCE2)) || + mrem) { if (snoop & P(SNOOP, HIT)) stats->rmt_hit++; else if (snoop & P(SNOOP, HITM)) From a17f06978769735ab5c7598c46881fa201e9b1a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:31 +0200 Subject: [PATCH 236/253] perf record: Set read_format for inherit_stat Set read_format for what we expect to get from read event generated by perf_event_attr::inherit_stat. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a5888c704e01..d9bd632ed7db 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -902,8 +902,13 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, if (opts->no_samples) attr->sample_freq = 0; - if (opts->inherit_stat) + if (opts->inherit_stat) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->inherit_stat = 1; + } if (opts->sample_address) { perf_evsel__set_sample_bit(evsel, ADDR); From dac7f6b7ed1c8601358357f60e9764a4c6a68d71 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:32 +0200 Subject: [PATCH 237/253] perf report: Add dump_read function Adding dump_read function to gather all the dump output of read function. Adding output of enabled and running times and id if enabled (3 new lines with '...' prefix below). $ perf record -s ... $ perf report -D 958358311769 0x91f8 [0x40]: PERF_RECORD_READ: 3339 3339 cycles:u 0 ... time enabled : 958358313731 ... time running : 958358313731 ... id : 80 Committer note: Do not use 'read' as a variable name as it breaks the build on older systems, such as RHEL6: CC /tmp/build/perf/util/session.o cc1: warnings being treated as errors util/session.c: In function 'dump_read': util/session.c:1132: error: declaration of 'read' shadows a global declaration /usr/include/bits/unistd.h:35: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/util/.session.o.tmp': No such file or directory Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ---- tools/perf/util/session.c | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bace3429c030..9e4004b08f55 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,10 +249,6 @@ static int process_read_event(struct perf_tool *tool, return err; } - dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? perf_evsel__name(evsel) : "FAIL", - event->read.value); - return 0; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dc453f84a14c..ac863691605f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1127,6 +1127,30 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } +static void dump_read(struct perf_evsel *evsel, union perf_event *event) +{ + struct read_event *read_event = &event->read; + u64 read_format; + + if (!dump_trace) + return; + + printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, + evsel ? perf_evsel__name(evsel) : "FAIL", + event->read.value); + + read_format = evsel->attr.read_format; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + printf("... time enabled : %" PRIu64 "\n", read_event->time_enabled); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + printf("... time running : %" PRIu64 "\n", read_event->time_running); + + if (read_format & PERF_FORMAT_ID) + printf("... id : %" PRIu64 "\n", read_event->id); +} + static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) @@ -1271,6 +1295,7 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_lost_samples += event->lost_samples.lost; return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: + dump_read(evsel, event); return tool->read(tool, event, sample, evsel, machine); case PERF_RECORD_THROTTLE: return tool->throttle(tool, event, sample, machine); From 64eed1deb6d87f4c0efe03297f50367a3689eb56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:33 +0200 Subject: [PATCH 238/253] perf values: Fix thread index bug We are taking wrong index (+1) for first thread, which leaves thread with index 0 unused and uninitialized. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 5de2e15e2eda..9ac36bf2c438 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -98,7 +98,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, return i; } - i = values->threads + 1; + i = values->threads; values->value[i] = malloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); @@ -106,7 +106,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } values->pid[i] = pid; values->tid[i] = tid; - values->threads = i; + values->threads = i + 1; return i; } From f4ef3b7c184c4c269f953f226f7158347d007622 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:34 +0200 Subject: [PATCH 239/253] perf values: Fix allocation check Bailing out in case the allocation failed, not the other way round. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 9ac36bf2c438..2c4af02f08cd 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -131,7 +131,7 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); - if (value) { + if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } From a1834fc938344dd3015a1df64ee7f2af70ded147 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:35 +0200 Subject: [PATCH 240/253] perf values: Zero value buffers We need to make sure the array of value pointers are zero initialized, because we use them in realloc later on and uninitialized non zero value will cause allocation error and aborted execution. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 2c4af02f08cd..3b56aeaa8cbb 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -12,7 +12,7 @@ int perf_read_values_init(struct perf_read_values *values) values->threads_max = 16; values->pid = malloc(values->threads_max * sizeof(*values->pid)); values->tid = malloc(values->threads_max * sizeof(*values->tid)); - values->value = malloc(values->threads_max * sizeof(*values->value)); + values->value = zalloc(values->threads_max * sizeof(*values->value)); if (!values->pid || !values->tid || !values->value) { pr_debug("failed to allocate read_values threads arrays"); goto out_free_pid; @@ -99,7 +99,8 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } i = values->threads; - values->value[i] = malloc(values->counters_max * sizeof(**values->value)); + + values->value[i] = zalloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); return -ENOMEM; @@ -130,12 +131,16 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); + int j; if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } + for (j = values->counters_max; j < counters_max; j++) + value[j] = 0; + values->value[i] = value; } From 9933183e365f7dd3a79507f1ffb4bcf9433a73ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:36 +0200 Subject: [PATCH 241/253] perf report: Group stat values on global event id There's no big value on displaying counts for every event ID, which is one per every CPU. Rather than that, displaying the whole sum for the event. $ perf record -c 100000 -e cycles:u -s test $ perf report -T Before: # PID TID cycles:u cycles:u cycles:u cycles:u ... [20 more columns of 'cycles:u'] 3339 3339 0 0 0 0 3340 3340 0 0 0 0 3341 3341 0 0 0 0 3342 3342 0 0 0 0 Now: # PID TID cycles:u 3339 3339 19678 3340 3340 18744 3341 3341 17335 3342 3342 26414 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/values.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9e4004b08f55..f9dff652dcbd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -241,7 +241,7 @@ static int process_read_event(struct perf_tool *tool, const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, - event->read.id, + evsel->idx, name, event->read.value); diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 3b56aeaa8cbb..8a32bb0095e5 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -192,7 +192,7 @@ int perf_read_values_add_value(struct perf_read_values *values, if (cindex < 0) return cindex; - values->value[tindex][cindex] = value; + values->value[tindex][cindex] += value; return 0; } From ba5d1a48aab56a2677113d071b5b1446877b9a1a Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:37 -0700 Subject: [PATCH 242/253] tools build tests: Don't hardcode gcc name Use $(CC) instead of harcoded gcc binary name. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-2-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/tests/ex/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index c50d5782ad5a..027d6c8a58a7 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -8,7 +8,7 @@ ex: include $(srctree)/tools/build/Makefile.include ex: ex-in.o libex-in.o - gcc -o $@ $^ + $(CC) -o $@ $^ ex.%: fixdep FORCE make -f $(srctree)/tools/build/Makefile.build dir=. $@ From 39a59f1e3ea541035637432db39158a461f29146 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:38 -0700 Subject: [PATCH 243/253] perf tools: Allow external definition of flex and bison binary names Allow user to define flex and bison binary names by passing FLEX and BISON variables. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a700a079a218..58924eb0f40b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -164,8 +164,8 @@ LN = ln -f MKDIR = mkdir FIND = find INSTALL = install -FLEX = flex -BISON = bison +FLEX ?= flex +BISON ?= bison STRIP = strip AWK = awk From 12024aacb0170779cd0b976b06d2e9b1767cf142 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:39 -0700 Subject: [PATCH 244/253] tools lib: Allow external definition of CC, AR and LD Use already defined values for CC, AR and LD when available. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-4-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index eb6e0b36bfc1..2538675731c7 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -8,9 +8,9 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar -LD = $(CROSS_COMPILE)ld +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory From 3866058ef15b6ae6f4ff48e088428b46bcc43fa1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:40 -0700 Subject: [PATCH 245/253] perf tools: Robustify detection of clang binary Prior to this patch, make scripts tested for CLANG with ifeq ($(CC), clang), failing to detect CLANG binaries with different names. Fix it by testing for the existence of __clang__ macro in the list of compiler defined macros. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 2 +- tools/lib/subcmd/Makefile | 2 +- tools/perf/Makefile.config | 4 ++-- tools/perf/util/intel-pt-decoder/Build | 2 +- tools/scripts/Makefile.include | 4 +++- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 2538675731c7..4563ba7ede6f 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -19,7 +19,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 3d1c3b5b5150..7e9f03c97e4c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -21,7 +21,7 @@ LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6a64c6bbd9a5..63f534a0902f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -148,7 +148,7 @@ ifndef DEBUG endif ifeq ($(DEBUG),0) -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 @@ -184,7 +184,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC), clang) + ifeq ($(CC_NO_CLANG), 1) PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) endif FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 7aca5d6d7e1f..10e0814bb8d2 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -25,6 +25,6 @@ $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/in CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -ifneq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 1) CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ccad8ce925e4..1e8b6116ba3c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -39,7 +39,9 @@ EXTRA_WARNINGS += -Wundef EXTRA_WARNINGS += -Wwrite-strings EXTRA_WARNINGS += -Wformat -ifneq ($(CC), clang) +CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) + +ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 endif From 70ff7c6caa2f2cee4a7621f5cb3b73b0a38327f1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:42 -0700 Subject: [PATCH 246/253] perf tools: Pass full path of FEATURES_DUMP When building with an external FEATURES_DUMP, bpf complains that features dump file is not found. Fix it by passing full file path. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-7-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 58924eb0f40b..a5bf3100f812 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -240,7 +240,7 @@ endif ifeq ($(FEATURES_DUMP),) FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP) else -FEATURE_DUMP_EXPORT := $(FEATURES_DUMP) +FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP)) endif export prefix bindir sharedir sysconfdir DESTDIR From a2105f8a9c38f8298f501cf1cd3218407ecdb1a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Aug 2017 11:26:14 -0300 Subject: [PATCH 247/253] tools headers: Sync cpu features kernel ABI headers with tooling headers These changes made the tools/arch/x86/include/ headers to drift from its kernel origins: 910448bbed06 ("perf/x86/amd/uncore: Rename cpufeatures macro for cache counters") 5442c2699552 ("x86/cpufeature, kvm/svm: Rename (shorten) the new "virtualized VMSAVE/VMLOAD" CPUID flag") cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels") Which was detected while building perf: make: Entering directory '/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h' Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' This sync causes just these perf object files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And the changes in the above changesets don't entail any need for change in the above 'perf bench' files. Cc: Adrian Hunter Cc: Andy Lutomirski Cc: Borislav Petkov Cc: David Ahern Cc: Janakarajan Natarajan Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-456aafouj911a4x4zwt8stkm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 5 ++--- tools/arch/x86/include/asm/disabled-features.h | 4 +--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 14f0f2913364..8ea315a11fe0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -177,7 +177,7 @@ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ /* @@ -196,7 +196,6 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ @@ -287,7 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index c10c9128f54e..5dff775af7cd 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -21,13 +21,11 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) -# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 -# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -51,7 +49,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 (DISABLE_PCID) +#define DISABLED_MASK4 0 #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 From 83bc9c371eaf21466f43b12d942b66c3f0d60ae5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Aug 2017 11:47:11 -0300 Subject: [PATCH 248/253] perf trace beauty: Beautify pkey_{alloc,free,mprotect} arguments Reuse 'mprotect' beautifiers for 'pkey_mprotect'. System wide tracing pkey_alloc, pkey_free and pkey_mprotect calls, with backtraces: # perf trace -e pkey_alloc,pkey_mprotect,pkey_free --max-stack=5 0.000 ( 0.011 ms): pkey/7818 pkey_alloc(init_val: DISABLE_ACCESS|DISABLE_WRITE) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_alloc (/home/acme/c/pkey) 0.022 ( 0.003 ms): pkey/7818 pkey_mprotect(start: 0x7f28c3890000, len: 4096, prot: READ|WRITE, pkey: -1) = 0 syscall (/usr/lib64/libc-2.25.so) pkey_mprotect (/home/acme/c/pkey) 0.030 ( 0.002 ms): pkey/7818 pkey_free(pkey: -1 ) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_free (/home/acme/c/pkey) The tools/include/uapi/asm-generic/mman-common.h file is used to find the access rights defines for the pkey_alloc syscall second argument. Since we have the detector of changes for the tools/include header files versus its kernel origin (include/uapi/asm-generic/mman-common.h), we'll get whatever new flag appears for that argument automatically. This method should be used in other cases where it is easy to generate those flags tables because the header has properly namespaced defines like PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3xq5312qlks7wtfzv2sk3nct@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 12 ++++- tools/perf/builtin-trace.c | 8 +++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/pkey_alloc.c | 50 +++++++++++++++++++ .../trace/beauty/pkey_alloc_access_rights.sh | 10 ++++ 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/pkey_alloc.c create mode 100755 tools/perf/trace/beauty/pkey_alloc_access_rights.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a5bf3100f812..91ef44bfaf3e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,8 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) -beauty_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl +beauty_outdir := $(OUTPUT)trace/beauty/generated +beauty_ioctl_outdir := $(beauty_outdir)/ioctl drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c drm_hdr_dir := $(srctree)/tools/include/uapi/drm drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh @@ -398,6 +399,13 @@ _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_ou $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ +pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c +asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ +pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh + +$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl) + $(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@ + sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -528,6 +536,7 @@ __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ + $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ @@ -803,6 +812,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(drm_ioctl_array) \ + $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 91905839e386..d59cdadf3a79 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -693,6 +693,14 @@ static struct syscall_fmt { [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, + { .name = "pkey_alloc", + .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, + { .name = "pkey_free", + .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, + { .name = "pkey_mprotect", + .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, { .name = "pread", .alias = "pread64", }, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 6f3f159f97e0..175d633c6b49 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -3,4 +3,5 @@ libperf-y += fcntl.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif +libperf-y += pkey_alloc.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 47a36a8eb842..4b58581a6053 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -78,6 +78,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights + size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c new file mode 100644 index 000000000000..2ba784a3734a --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -0,0 +1,50 @@ +/* + * trace/beauty/pkey_alloc.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include + +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ + int i, printed = 0; + +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + if (access_rights == 0) { + const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (s) + return scnprintf(bf, size, "%s", s); + return scnprintf(bf, size, "%d", 0); + } + + for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { + int bit = 1 << (i - 1); + + if (!(access_rights & bit)) + continue; + + if (printed != 0) + printed += scnprintf(bf + printed, size - printed, "|"); + + if (strarray__pkey_alloc_access_rights.entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + else + printed += scnprintf(bf + printed, size - printed, "0x%#", bit); + } + + return printed; +} + +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long cmd = arg->val; + + return pkey_alloc__scnprintf_access_rights(cmd, bf, size); +} diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh new file mode 100755 index 000000000000..62e51a02b839 --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *pkey_alloc_access_rights[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*' +egrep $regex ${header_dir}/mman-common.h | \ + sed -r "s/$regex/\2 \2 \1/g" | \ + sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +printf "};\n" From 8d4e6c4caa12dafbcba138e5450b7af17b0b2194 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Mar 2017 18:39:56 +0300 Subject: [PATCH 249/253] perf/core, pt, bts: Get rid of itrace_started I just noticed that hw.itrace_started and hw.config are aliased to the same location. Now, the PT driver happens to use both, which works out fine by sheer luck: - STORE(hw.itrace_start) is ordered before STORE(hw.config), in the program order, although there are no compiler barriers to ensure that, - to the perf_log_itrace_start() hw.itrace_start looks set at the same time as when it is intended to be set because both stores happen in the same path, - hw.config is never reset to zero in the PT driver. Now, the use of hw.config by the PT driver makes more sense (it being a HW PMU) than messing around with itrace_started, which is an awkward API to begin with. This patch replaces hw.itrace_started with an attach_state bit and an API call for the PMU drivers to use to communicate the condition. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170330153956.25994-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 2 +- arch/x86/events/intel/pt.c | 5 +++-- include/linux/perf_event.h | 5 ++--- kernel/events/core.c | 7 ++++++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index ddd8d3516bfc..16076eb34699 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -268,7 +268,7 @@ static void bts_event_start(struct perf_event *event, int flags) bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; - event->hw.itrace_started = 1; + perf_event_itrace_started(event); event->hw.state = 0; __bts_event_start(event); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index ae8324d65e61..81fd41d5a0d9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -471,8 +471,9 @@ static void pt_config(struct perf_event *event) struct pt *pt = this_cpu_ptr(&pt_ctx); u64 reg; - if (!event->hw.itrace_started) { - event->hw.itrace_started = 1; + /* First round: clear STATUS, in particular the PSB byte counter. */ + if (!event->hw.config) { + perf_event_itrace_started(event); wrmsrl(MSR_IA32_RTIT_STATUS, 0); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c00cd4b02f32..adda0aaae6c8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -147,9 +147,6 @@ struct hw_perf_event { struct list_head cqm_groups_entry; struct list_head cqm_group_entry; }; - struct { /* itrace */ - int itrace_started; - }; struct { /* amd_power */ u64 pwr_acc; u64 ptsc; @@ -541,6 +538,7 @@ struct swevent_hlist { #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 +#define PERF_ATTACH_ITRACE 0x10 struct perf_cgroup; struct ring_buffer; @@ -864,6 +862,7 @@ extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); +extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); diff --git a/kernel/events/core.c b/kernel/events/core.c index e5467e107624..77fd6b11ef22 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7301,6 +7301,11 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_output_end(&handle); } +void perf_event_itrace_started(struct perf_event *event) +{ + event->attach_state |= PERF_ATTACH_ITRACE; +} + static void perf_log_itrace_start(struct perf_event *event) { struct perf_output_handle handle; @@ -7316,7 +7321,7 @@ static void perf_log_itrace_start(struct perf_event *event) event = event->parent; if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) || - event->hw.itrace_started) + event->attach_state & PERF_ATTACH_ITRACE) return; rec.header.type = PERF_RECORD_ITRACE_START; From fc7ce9c74c3ad232b084d80148654f926d01ece7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Aug 2017 20:52:49 -0400 Subject: [PATCH 250/253] perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR For understanding how the workload maps to memory channels and hardware behavior, it's very important to collect address maps with physical addresses. For example, 3D XPoint access can only be found by filtering the physical address. Add a new sample type for physical address. perf already has a facility to collect data virtual address. This patch introduces a function to convert the virtual address to physical address. The function is quite generic and can be extended to any architecture as long as a virtual address is provided. - For kernel direct mapping addresses, virt_to_phys is used to convert the virtual addresses to physical address. - For user virtual addresses, __get_user_pages_fast is used to walk the pages tables for user physical address. - This does not work for vmalloc addresses right now. These are not resolved, but code to do that could be added. The new sample type requires collecting the virtual address. The virtual address will not be output unless SAMPLE_ADDR is applied. For security, the physical address can only be exposed to root or privileged user. Tested-by: Madhavan Srinivasan Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: mpe@ellerman.id.au Link: http://lkml.kernel.org/r/1503967969-48278-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/powerpc/perf/core-book3s.c | 3 ++- arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 2 +- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 4 ++- kernel/events/core.c | 46 +++++++++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6c2d4168daec..2e3eb7431571 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, ~0ULL, event->hw.last_period); - if (event->attr.sample_type & PERF_SAMPLE_ADDR) + if (event->attr.sample_type & + (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) perf_get_data_addr(regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 98e36e0c791c..e1965e5ff570 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1185,7 +1185,7 @@ static void setup_pebs_sample_data(struct perf_event *event, else regs->flags &= ~PERF_EFLAGS_EXACT; - if ((sample_type & PERF_SAMPLE_ADDR) && + if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && x86_pmu.intel_cap.pebs_format >= 1) data->addr = pebs->dla; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9337589014cc..4196f81ec0e1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -91,7 +91,7 @@ struct amd_nb { (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_TRANSACTION) + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) /* * A debug store configuration. diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index adda0aaae6c8..718ba163c1b9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -943,6 +943,8 @@ struct perf_sample_data { struct perf_regs regs_intr; u64 stack_user_size; + + u64 phys_addr; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2a37ae925d85..140ae638cfd6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -139,8 +139,9 @@ enum perf_event_sample_format { PERF_SAMPLE_IDENTIFIER = 1U << 16, PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, + PERF_SAMPLE_PHYS_ADDR = 1U << 19, - PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ }; /* @@ -814,6 +815,7 @@ enum perf_event_type { * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR + * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/kernel/events/core.c b/kernel/events/core.c index 77fd6b11ef22..ce64f3fed5c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1575,6 +1575,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_TRANSACTION) size += sizeof(data->txn); + if (sample_type & PERF_SAMPLE_PHYS_ADDR) + size += sizeof(data->phys_addr); + event->header_size = size; } @@ -6017,6 +6020,9 @@ void perf_output_sample(struct perf_output_handle *handle, } } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) + perf_output_put(handle, data->phys_addr); + if (!event->attr.watermark) { int wakeup_events = event->attr.wakeup_events; @@ -6032,6 +6038,38 @@ void perf_output_sample(struct perf_output_handle *handle, } } +static u64 perf_virt_to_phys(u64 virt) +{ + u64 phys_addr = 0; + struct page *p = NULL; + + if (!virt) + return 0; + + if (virt >= TASK_SIZE) { + /* If it's vmalloc()d memory, leave phys_addr as 0 */ + if (virt_addr_valid((void *)(uintptr_t)virt) && + !(virt >= VMALLOC_START && virt < VMALLOC_END)) + phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt); + } else { + /* + * Walking the pages tables for user address. + * Interrupts are disabled, so it prevents any tear down + * of the page tables. + * Try IRQ-safe __get_user_pages_fast first. + * If failed, leave phys_addr as 0. + */ + if ((current->mm != NULL) && + (__get_user_pages_fast(virt, 1, 0, &p) == 1)) + phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + + if (p) + put_page(p); + } + + return phys_addr; +} + void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, @@ -6150,6 +6188,9 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } + + if (sample_type & PERF_SAMPLE_PHYS_ADDR) + data->phys_addr = perf_virt_to_phys(data->addr); } static void __always_inline @@ -9909,6 +9950,11 @@ SYSCALL_DEFINE5(perf_event_open, return -EINVAL; } + /* Only privileged users can get physical addresses */ + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && + perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!attr.sample_max_stack) attr.sample_max_stack = sysctl_perf_event_max_stack; From 5da382eb6ea37e2c49ef521c636d73f6ecc3fa81 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Aug 2017 12:46:50 +0200 Subject: [PATCH 251/253] perf/x86: Fix caps/ for !Intel Move the 'max_precise' capability into generic x86 code where it belongs. This fixes a sysfs splat on !Intel systems where we fail to set x86_pmu_caps_group.atts. Reported-and-tested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hpa@zytor.com Fixes: 22688d1c20f5 ("x86/perf: Export some PMU attributes in caps/ directory") Link: http://lkml.kernel.org/r/20170828104650.2u3rsim4jafyjzv2@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 33 ++++++++++++++++++++++++++++----- arch/x86/events/intel/core.c | 14 ++------------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d5f98095a155..73a6311c8baa 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1757,10 +1757,7 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) } static struct attribute_group x86_pmu_attr_group; - -static struct attribute_group x86_pmu_caps_group = { - .name = "caps", -}; +static struct attribute_group x86_pmu_caps_group; static int __init init_hw_perf_events(void) { @@ -1808,7 +1805,14 @@ static int __init init_hw_perf_events(void) 0, x86_pmu.num_counters, 0, 0); x86_pmu_format_group.attrs = x86_pmu.format_attrs; - x86_pmu_caps_group.attrs = x86_pmu.caps_attrs; + + if (x86_pmu.caps_attrs) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); + if (!WARN_ON(!tmp)) + x86_pmu_caps_group.attrs = tmp; + } if (x86_pmu.event_attrs) x86_pmu_events_group.attrs = x86_pmu.event_attrs; @@ -2224,6 +2228,25 @@ static struct attribute_group x86_pmu_attr_group = { .attrs = x86_pmu_attrs, }; +static ssize_t max_precise_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); +} + +static DEVICE_ATTR_RO(max_precise); + +static struct attribute *x86_pmu_caps_attrs[] = { + &dev_attr_max_precise.attr, + NULL +}; + +static struct attribute_group x86_pmu_caps_group = { + .name = "caps", + .attrs = x86_pmu_caps_attrs, +}; + static const struct attribute_group *x86_pmu_attr_groups[] = { &x86_pmu_attr_group, &x86_pmu_format_group, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8fa2abd9c8b6..829e89cfcee2 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3820,19 +3820,9 @@ static ssize_t pmu_name_show(struct device *cdev, static DEVICE_ATTR_RO(pmu_name); -static ssize_t max_precise_show(struct device *cdev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise()); -} - -static DEVICE_ATTR_RO(max_precise); - static struct attribute *intel_pmu_caps_attrs[] = { - &dev_attr_pmu_name.attr, - &dev_attr_max_precise.attr, - NULL + &dev_attr_pmu_name.attr, + NULL }; static struct attribute *intel_pmu_attrs[] = { From 2c29461e273abaf149cf8220c3403e9d67dd8b61 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Tue, 29 Aug 2017 20:57:23 +0800 Subject: [PATCH 252/253] perf probe: Fix kprobe blacklist checking condition The commit 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events"), 'perf probe' supports checking the blacklist of the fuctions which can not be probed. But the checking condition is wrong, that the end_addr of the symbol which is the start_addr of the next symbol can't be included. Committer notes: IOW make it match its kernel counterpart in kernel/kprobes.c: bool within_kprobe_blacklist(unsigned long addr) Each entry have as its end address not its end address, but the first address _outside_ that symbol, which for related functions, is the first address of the next symbol, like these from kernel/trace/trace_probe.c: 0xffffffffbd198df0-0xffffffffbd198e40 print_type_u8 0xffffffffbd198e40-0xffffffffbd198e90 print_type_u16 0xffffffffbd198e90-0xffffffffbd198ee0 print_type_u32 0xffffffffbd198ee0-0xffffffffbd198f30 print_type_u64 0xffffffffbd198f30-0xffffffffbd198f80 print_type_s8 0xffffffffbd198f80-0xffffffffbd198fd0 print_type_s16 0xffffffffbd198fd0-0xffffffffbd199020 print_type_s32 0xffffffffbd199020-0xffffffffbd199070 print_type_s64 0xffffffffbd199070-0xffffffffbd1990c0 print_type_x8 0xffffffffbd1990c0-0xffffffffbd199110 print_type_x16 0xffffffffbd199110-0xffffffffbd199160 print_type_x32 0xffffffffbd199160-0xffffffffbd1991b0 print_type_x64 But not always: 0xffffffffbd1997b0-0xffffffffbd1997c0 fetch_kernel_stack_address (kernel/trace/trace_probe.c) 0xffffffffbd1c57f0-0xffffffffbd1c58b0 __context_tracking_enter (kernel/context_tracking.c) Signed-off-by: Li Bin Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events") Link: http://lkml.kernel.org/r/1504011443-7269-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d7cd1142f4c6..b7aaf9b2294d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2395,7 +2395,7 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist, struct kprobe_blacklist_node *node; list_for_each_entry(node, blacklist, list) { - if (node->start <= address && address <= node->end) + if (node->start <= address && address < node->end) return node; } From b2f7605076d6cdd68162c42c34caadafbbe4c69f Mon Sep 17 00:00:00 2001 From: Li Bin Date: Mon, 5 Jun 2017 08:34:09 +0800 Subject: [PATCH 253/253] perf symbols: Fix plt entry calculation for ARM and AARCH64 On x86, the plt header size is as same as the plt entry size, and can be identified from shdr's sh_entsize of the plt. But we can't assume that the sh_entsize of the plt shdr is always the plt entry size in all architecture, and the plt header size may be not as same as the plt entry size in some architecure. On ARM, the plt header size is 20 bytes and the plt entry size is 12 bytes (don't consider the FOUR_WORD_PLT case) that refer to the binutils implementation. The plt section is as follows: Disassembly of section .plt: 000004a0 <__cxa_finalize@plt-0x14>: 4a0: e52de004 push {lr} ; (str lr, [sp, #-4]!) 4a4: e59fe004 ldr lr, [pc, #4] ; 4b0 <_init+0x1c> 4a8: e08fe00e add lr, pc, lr 4ac: e5bef008 ldr pc, [lr, #8]! 4b0: 00008424 .word 0x00008424 000004b4 <__cxa_finalize@plt>: 4b4: e28fc600 add ip, pc, #0, 12 4b8: e28cca08 add ip, ip, #8, 20 ; 0x8000 4bc: e5bcf424 ldr pc, [ip, #1060]! ; 0x424 000004c0 : 4c0: e28fc600 add ip, pc, #0, 12 4c4: e28cca08 add ip, ip, #8, 20 ; 0x8000 4c8: e5bcf41c ldr pc, [ip, #1052]! ; 0x41c On AARCH64, the plt header size is 32 bytes and the plt entry size is 16 bytes. The plt section is as follows: Disassembly of section .plt: 0000000000000560 <__cxa_finalize@plt-0x20>: 560: a9bf7bf0 stp x16, x30, [sp,#-16]! 564: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 568: f944be11 ldr x17, [x16,#2424] 56c: 9125e210 add x16, x16, #0x978 570: d61f0220 br x17 574: d503201f nop 578: d503201f nop 57c: d503201f nop 0000000000000580 <__cxa_finalize@plt>: 580: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 584: f944c211 ldr x17, [x16,#2432] 588: 91260210 add x16, x16, #0x980 58c: d61f0220 br x17 0000000000000590 <__gmon_start__@plt>: 590: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 594: f944c611 ldr x17, [x16,#2440] 598: 91262210 add x16, x16, #0x988 59c: d61f0220 br x17 NOTES: In addition to ARM and AARCH64, other architectures, such as s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa also need to consider this issue. Signed-off-by: Li Bin Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Link: http://lkml.kernel.org/r/1496622849-21877-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a70479061fce..5c39f420111e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -259,7 +259,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * { uint32_t nr_rel_entries, idx; GElf_Sym sym; - u64 plt_offset; + u64 plt_offset, plt_header_size, plt_entry_size; GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt, shdr_dynsym; @@ -326,6 +326,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; + switch (ehdr.e_machine) { + case EM_ARM: + plt_header_size = 20; + plt_entry_size = 12; + break; + + case EM_AARCH64: + plt_header_size = 32; + plt_entry_size = 16; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + plt_header_size = shdr_plt.sh_entsize; + plt_entry_size = shdr_plt.sh_entsize; + break; + } + plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { GElf_Rela pos_mem, *pos; @@ -335,7 +352,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -346,11 +362,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; } @@ -361,7 +378,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -372,11 +388,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; }