From 7eb709f29593aced51901cb53565477762800722 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Mar 2018 17:36:56 +0100 Subject: [PATCH 01/47] perf: Fix sibling iteration Mark noticed that the change to sibling_list changed some iteration semantics; because previously we used group_list as list entry, sibling events would always have an empty sibling_list. But because we now use sibling_list for both list head and list entry, siblings will report as having siblings. Fix this with a custom for_each_sibling_event() iterator. Fixes: 8343aae66167 ("perf/core: Remove perf_event::group_entry") Reported-by: Mark Rutland Suggested-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: vincent.weaver@maine.edu Cc: alexander.shishkin@linux.intel.com Cc: torvalds@linux-foundation.org Cc: alexey.budankov@linux.intel.com Cc: valery.cherepennikov@intel.com Cc: eranian@google.com Cc: acme@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: davidcc@google.com Cc: kan.liang@intel.com Cc: Dmitry.Prohorov@intel.com Cc: jolsa@redhat.com Link: https://lkml.kernel.org/r/20180315170129.GX4043@hirez.programming.kicks-ass.net --- arch/alpha/kernel/perf_event.c | 2 +- arch/arm/mach-imx/mmdc.c | 2 +- arch/arm/mm/cache-l2x0-pmu.c | 2 +- arch/mips/kernel/perf_event_mipsxx.c | 2 +- arch/powerpc/perf/core-book3s.c | 2 +- arch/powerpc/perf/core-fsl-emb.c | 2 +- arch/sparc/kernel/perf_event.c | 2 +- arch/x86/events/core.c | 2 +- arch/x86/events/intel/uncore.c | 2 +- drivers/bus/arm-cci.c | 2 +- drivers/bus/arm-ccn.c | 4 +-- drivers/perf/arm_dsu_pmu.c | 2 +- drivers/perf/arm_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- drivers/perf/qcom_l2_pmu.c | 6 ++--- drivers/perf/qcom_l3_pmu.c | 2 +- drivers/perf/xgene_pmu.c | 4 +-- include/linux/perf_event.h | 4 +++ kernel/events/core.c | 34 +++++++++++------------- 19 files changed, 40 insertions(+), 40 deletions(-) diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 435864c24479..5613aa378a83 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -351,7 +351,7 @@ static int collect_events(struct perf_event *group, int max_count, evtype[n] = group->hw.event_base; current_idx[n++] = PMC_NO_INDEX; } - list_for_each_entry(pe, &group->sibling_list, sibling_list) { + for_each_sibling_event(pe, group) { if (!is_software_event(pe) && pe->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) return -1; diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 27a9ca20933e..04b3bf71de94 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -269,7 +269,7 @@ static bool mmdc_pmu_group_is_valid(struct perf_event *event) return false; } - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (!mmdc_pmu_group_event_is_valid(sibling, pmu, &counter_mask)) return false; } diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c index 3a89ea4c2b57..afe5b4c7b164 100644 --- a/arch/arm/mm/cache-l2x0-pmu.c +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -293,7 +293,7 @@ static bool l2x0_pmu_group_is_valid(struct perf_event *event) else if (!is_software_event(leader)) return false; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (sibling->pmu == pmu) num_hw++; else if (!is_software_event(sibling)) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 46097ff3208b..ee73550f0b9a 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -711,7 +711,7 @@ static int validate_group(struct perf_event *event) if (mipsxx_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) return -EINVAL; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (mipsxx_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) return -EINVAL; } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 7c1f66050433..f8908ea4ea73 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1426,7 +1426,7 @@ static int collect_events(struct perf_event *group, int max_count, flags[n] = group->hw.event_base; events[n++] = group->hw.config; } - list_for_each_entry(event, &group->sibling_list, sibling_list) { + for_each_sibling_event(event, group) { if (event->pmu->task_ctx_nr == perf_hw_context && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 94c2e63662c6..85f1d18e5fd3 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -277,7 +277,7 @@ static int collect_events(struct perf_event *group, int max_count, ctrs[n] = group; n++; } - list_for_each_entry(event, &group->sibling_list, sibling_list) { + for_each_sibling_event(event, group) { if (!is_software_event(event) && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index a0a86d369119..d3149baaa33c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1342,7 +1342,7 @@ static int collect_events(struct perf_event *group, int max_count, events[n] = group->hw.event_base; current_idx[n++] = PIC_NO_INDEX; } - list_for_each_entry(event, &group->sibling_list, sibling_list) { + for_each_sibling_event(event, group) { if (!is_software_event(event) && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 77a4125b6b1f..bfc8f43909c1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -990,7 +990,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, sibling_list) { + for_each_sibling_event(event, leader) { if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF) continue; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9e374cd22ad2..a7956fc7ca1d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -354,7 +354,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, sibling_list) { + for_each_sibling_event(event, leader) { if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index c98435bdb64f..c4c0c8560cce 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1311,7 +1311,7 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; } diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 1c310a4be000..65b7e4042ece 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -846,11 +846,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) !is_software_event(event->group_leader)) return -EINVAL; - list_for_each_entry(sibling, &event->group_leader->sibling_list, - sibling_list) + for_each_sibling_event(sibling, event->group_leader) { if (sibling->pmu != event->pmu && !is_software_event(sibling)) return -EINVAL; + } return 0; } diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 660680d78147..660cb8ac886a 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -536,7 +536,7 @@ static bool dsu_pmu_validate_group(struct perf_event *event) memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask)); if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader)) return false; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling)) return false; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 628d7a7b9526..344e2083e941 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -311,7 +311,7 @@ validate_group(struct perf_event *event) if (!validate_event(event->pmu, &fake_pmu, leader)) return -EINVAL; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (!validate_event(event->pmu, &fake_pmu, sibling)) return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index e3356087fd76..44df61397a38 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -82,7 +82,7 @@ static bool hisi_validate_event_group(struct perf_event *event) counters++; } - list_for_each_entry(sibling, &event->group_leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, event->group_leader) { if (is_software_event(sibling)) continue; if (sibling->pmu != event->pmu) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 5e535a718965..1a7889f63c9a 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -534,8 +534,7 @@ static int l2_cache_event_init(struct perf_event *event) return -EINVAL; } - list_for_each_entry(sibling, &event->group_leader->sibling_list, - sibling_list) + for_each_sibling_event(sibling, event->group_leader) { if (sibling->pmu != event->pmu && !is_software_event(sibling)) { dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, @@ -571,8 +570,7 @@ static int l2_cache_event_init(struct perf_event *event) return -EINVAL; } - list_for_each_entry(sibling, &event->group_leader->sibling_list, - sibling_list) { + for_each_sibling_event(sibling, event->group_leader) { if ((sibling != event) && !is_software_event(sibling) && (L2_EVT_GROUP(sibling->attr.config) == diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 5dedf4b1a552..2dc63d61f2ea 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -468,7 +468,7 @@ static bool qcom_l3_cache__validate_event_group(struct perf_event *event) counters = event_num_counters(event); counters += event_num_counters(leader); - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sibling, leader) { if (is_software_event(sibling)) continue; if (sibling->pmu != event->pmu) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index f1f4a56cab5e..6bdb1dad805f 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -949,11 +949,11 @@ static int xgene_perf_event_init(struct perf_event *event) !is_software_event(event->group_leader)) return -EINVAL; - list_for_each_entry(sibling, &event->group_leader->sibling_list, - sibling_list) + for_each_sibling_event(sibling, event->group_leader) { if (sibling->pmu != event->pmu && !is_software_event(sibling)) return -EINVAL; + } return 0; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2bb200e1bbea..ff39ab011376 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -536,6 +536,10 @@ struct pmu_event_list { struct list_head list; }; +#define for_each_sibling_event(sibling, event) \ + if ((event)->group_leader == (event)) \ + list_for_each_entry((sibling), &(event)->sibling_list, sibling_list) + /** * struct perf_event - performance event kernel representation: */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 3b4c7792a6ac..4d7a460d6669 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -643,7 +643,7 @@ static void perf_event_update_sibling_time(struct perf_event *leader) { struct perf_event *sibling; - list_for_each_entry(sibling, &leader->sibling_list, sibling_list) + for_each_sibling_event(sibling, leader) perf_event_update_time(sibling); } @@ -1828,7 +1828,7 @@ static void perf_group_attach(struct perf_event *event) perf_event__header_size(group_leader); - list_for_each_entry(pos, &group_leader->sibling_list, sibling_list) + for_each_sibling_event(pos, group_leader) perf_event__header_size(pos); } @@ -1928,7 +1928,7 @@ static void perf_group_detach(struct perf_event *event) out: perf_event__header_size(event->group_leader); - list_for_each_entry(tmp, &event->group_leader->sibling_list, sibling_list) + for_each_sibling_event(tmp, event->group_leader) perf_event__header_size(tmp); } @@ -1951,13 +1951,13 @@ static inline int __pmu_filter_match(struct perf_event *event) */ static inline int pmu_filter_match(struct perf_event *event) { - struct perf_event *child; + struct perf_event *sibling; if (!__pmu_filter_match(event)) return 0; - list_for_each_entry(child, &event->sibling_list, sibling_list) { - if (!__pmu_filter_match(child)) + for_each_sibling_event(sibling, event) { + if (!__pmu_filter_match(sibling)) return 0; } @@ -2031,7 +2031,7 @@ group_sched_out(struct perf_event *group_event, /* * Schedule out siblings (if any): */ - list_for_each_entry(event, &group_event->sibling_list, sibling_list) + for_each_sibling_event(event, group_event) event_sched_out(event, cpuctx, ctx); perf_pmu_enable(ctx->pmu); @@ -2310,7 +2310,7 @@ group_sched_in(struct perf_event *group_event, /* * Schedule in siblings as one group (if any): */ - list_for_each_entry(event, &group_event->sibling_list, sibling_list) { + for_each_sibling_event(event, group_event) { if (event_sched_in(event, cpuctx, ctx)) { partial_group = event; goto group_error; @@ -2326,7 +2326,7 @@ group_sched_in(struct perf_event *group_event, * partial group before returning: * The events up to the failed event are scheduled out normally. */ - list_for_each_entry(event, &group_event->sibling_list, sibling_list) { + for_each_sibling_event(event, group_event) { if (event == partial_group) break; @@ -3863,7 +3863,7 @@ static void __perf_event_read(void *info) pmu->read(event); - list_for_each_entry(sub, &event->sibling_list, sibling_list) { + for_each_sibling_event(sub, event) { if (sub->state == PERF_EVENT_STATE_ACTIVE) { /* * Use sibling's PMU rather than @event's since @@ -4711,7 +4711,7 @@ static int __perf_read_group_add(struct perf_event *leader, if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); - list_for_each_entry(sub, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); @@ -4905,7 +4905,7 @@ static void perf_event_for_each(struct perf_event *event, event = event->group_leader; perf_event_for_each_child(event, func); - list_for_each_entry(sibling, &event->sibling_list, sibling_list) + for_each_sibling_event(sibling, event) perf_event_for_each_child(sibling, func); } @@ -6077,7 +6077,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); - list_for_each_entry(sub, &leader->sibling_list, sibling_list) { + for_each_sibling_event(sub, leader) { n = 0; if ((sub != event) && @@ -10662,8 +10662,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(group_leader, 0); put_ctx(gctx); - list_for_each_entry(sibling, &group_leader->sibling_list, - sibling_list) { + for_each_sibling_event(sibling, group_leader) { perf_remove_from_context(sibling, 0); put_ctx(gctx); } @@ -10684,8 +10683,7 @@ SYSCALL_DEFINE5(perf_event_open, * By installing siblings first we NO-OP because they're not * reachable through the group lists. */ - list_for_each_entry(sibling, &group_leader->sibling_list, - sibling_list) { + for_each_sibling_event(sibling, group_leader) { perf_event__state_init(sibling); perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); @@ -11324,7 +11322,7 @@ static int inherit_group(struct perf_event *parent_event, * case inherit_event() will create individual events, similar to what * perf_group_detach() would do anyway. */ - list_for_each_entry(sub, &parent_event->sibling_list, sibling_list) { + for_each_sibling_event(sub, parent_event) { child_ctr = inherit_event(sub, parent, parent_ctx, child, leader, child_ctx); if (IS_ERR(child_ctr)) From bbb68468641547d56c83012670bcaf77f3dacd64 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 16 Mar 2018 12:51:40 +0000 Subject: [PATCH 02/47] perf/core: Clear sibling list of detached events When perf_group_dettach() is called on a group leader, it updates each sibling's group_leader field to point to that sibling, effectively upgrading each siblnig to a group leader. After perf_group_detach has completed, the caller may free the leader event. We only remove siblings from the group leader's sibling_list when the leader has a non-empty group_node. This was fine prior to commit: 8343aae66167df67 ("perf/core: Remove perf_event::group_entry") ... as the sibling's sibling_list would be empty. However, now that we use the sibling_list field as both the list head and the list entry, this leaves each sibling with a non-empty sibling list, including the stale leader event. If perf_group_detach() is subsequently called on a sibling, it will appear to be a group leader, and we'll walk the sibling_list, potentially dereferencing these stale events. In 0day testing, this has been observed to result in kernel panics. Let's avoid this by always removing siblings from the sibling list when we promote them to leaders. Fixes: 8343aae66167df67 ("perf/core: Remove perf_event::group_entry") Signed-off-by: Mark Rutland Signed-off-by: Thomas Gleixner Cc: vincent.weaver@maine.edu Cc: Peter Zijlstra Cc: torvalds@linux-foundation.org Cc: Alexey Budankov Cc: valery.cherepennikov@intel.com Cc: linux-tip-commits@vger.kernel.org Cc: eranian@google.com Cc: acme@redhat.com Cc: alexander.shishkin@linux.intel.com Cc: davidcc@google.com Cc: kan.liang@intel.com Cc: Dmitry.Prohorov@intel.com Cc: Jiri Olsa Link: https://lkml.kernel.org/r/20180316131741.3svgr64yibc6vsid@lakrids.cambridge.arm.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4d7a460d6669..2776a660db15 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1906,12 +1906,12 @@ static void perf_group_detach(struct perf_event *event) list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { sibling->group_leader = sibling; + list_del_init(&sibling->sibling_list); /* Inherit group flags from the previous leader */ sibling->group_caps = event->group_caps; if (!RB_EMPTY_NODE(&event->group_node)) { - list_del_init(&sibling->sibling_list); add_event_to_groups(sibling, event->ctx); if (sibling->state == PERF_EVENT_STATE_ACTIVE) { From e725920cdb1c79fdc71f2f164f59be8c411cad68 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:34 +0100 Subject: [PATCH 03/47] perf env: Free memory nodes data Forgot to free env's memory nodes, adding needed code to perf_env__exit. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6d311868d850..4c842762e3f2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -32,6 +32,10 @@ void perf_env__exit(struct perf_env *env) for (i = 0; i < env->caches_cnt; i++) cpu_cache_level__free(&env->caches[i]); zfree(&env->caches); + + for (i = 0; i < env->nr_memory_nodes; i++) + free(env->memory_nodes[i].set); + zfree(&env->memory_nodes); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) From 4acf6142de3fbc4fc9cc8da0a1aec073f05b724f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:35 +0100 Subject: [PATCH 04/47] perf tools: Add mem2node object Adding mem2node object to allow the easy lookup of the node for the physical address. It has following interface: int mem2node__init(struct mem2node *map, struct perf_env *env); void mem2node__exit(struct mem2node *map); int mem2node__node(struct mem2node *map, u64 addr); The mem2node__toolsinit initialize object from the perf data file MEM_TOPOLOGY feature data. Following calls to mem2node__node will return node number for given physical address. The mem2node__exit function frees the object. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/mem2node.c | 134 +++++++++++++++++++++++++++++++++++++ tools/perf/util/mem2node.h | 19 ++++++ 3 files changed, 154 insertions(+) create mode 100644 tools/perf/util/mem2node.c create mode 100644 tools/perf/util/mem2node.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ea0a452550b0..8052373bcd6a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -106,6 +106,7 @@ libperf-y += units.o libperf-y += time-utils.o libperf-y += expr-bison.o libperf-y += branch.o +libperf-y += mem2node.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c new file mode 100644 index 000000000000..c6fd81c02586 --- /dev/null +++ b/tools/perf/util/mem2node.c @@ -0,0 +1,134 @@ +#include +#include +#include +#include "mem2node.h" +#include "util.h" + +struct phys_entry { + struct rb_node rb_node; + u64 start; + u64 end; + u64 node; +}; + +static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct phys_entry *e; + + while (*p != NULL) { + parent = *p; + e = rb_entry(parent, struct phys_entry, rb_node); + + if (entry->start < e->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&entry->rb_node, parent, p); + rb_insert_color(&entry->rb_node, root); +} + +static void +phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node) +{ + entry->start = start; + entry->end = start + bsize; + entry->node = node; + RB_CLEAR_NODE(&entry->rb_node); +} + +int mem2node__init(struct mem2node *map, struct perf_env *env) +{ + struct memory_node *n, *nodes = &env->memory_nodes[0]; + struct phys_entry *entries, *tmp_entries; + u64 bsize = env->memory_bsize; + int i, j = 0, max = 0; + + memset(map, 0x0, sizeof(*map)); + map->root = RB_ROOT; + + for (i = 0; i < env->nr_memory_nodes; i++) { + n = &nodes[i]; + max += bitmap_weight(n->set, n->size); + } + + entries = zalloc(sizeof(*entries) * max); + if (!entries) + return -ENOMEM; + + for (i = 0; i < env->nr_memory_nodes; i++) { + u64 bit; + + n = &nodes[i]; + + for (bit = 0; bit < n->size; bit++) { + u64 start; + + if (!test_bit(bit, n->set)) + continue; + + start = bit * bsize; + + /* + * Merge nearby areas, we walk in order + * through the bitmap, so no need to sort. + */ + if (j > 0) { + struct phys_entry *prev = &entries[j - 1]; + + if ((prev->end == start) && + (prev->node == n->node)) { + prev->end += bsize; + continue; + } + } + + phys_entry__init(&entries[j++], start, bsize, n->node); + } + } + + /* Cut unused entries, due to merging. */ + tmp_entries = realloc(entries, sizeof(*entries) * j); + if (tmp_entries) + entries = tmp_entries; + + for (i = 0; i < j; i++) { + pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n", + entries[i].node, entries[i].start, entries[i].end); + + phys_entry__insert(&entries[i], &map->root); + } + + map->entries = entries; + return 0; +} + +void mem2node__exit(struct mem2node *map) +{ + zfree(&map->entries); +} + +int mem2node__node(struct mem2node *map, u64 addr) +{ + struct rb_node **p, *parent = NULL; + struct phys_entry *entry; + + p = &map->root.rb_node; + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct phys_entry, rb_node); + if (addr < entry->start) + p = &(*p)->rb_left; + else if (addr >= entry->end) + p = &(*p)->rb_right; + else + goto out; + } + + entry = NULL; +out: + return entry ? (int) entry->node : -1; +} diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h new file mode 100644 index 000000000000..59c4752a2181 --- /dev/null +++ b/tools/perf/util/mem2node.h @@ -0,0 +1,19 @@ +#ifndef __MEM2NODE_H +#define __MEM2NODE_H + +#include +#include "env.h" + +struct phys_entry; + +struct mem2node { + struct rb_root root; + struct phys_entry *entries; + int cnt; +}; + +int mem2node__init(struct mem2node *map, struct perf_env *env); +void mem2node__exit(struct mem2node *map); +int mem2node__node(struct mem2node *map, u64 addr); + +#endif /* __MEM2NODE_H */ From 8185850ad603acfc66f5b3d284955809dffa5d2c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:36 +0100 Subject: [PATCH 05/47] perf tests: Add mem2node object test Adding mem2node object automated test. The test prepares few artificial nodes - memory maps and verifies the mem2node object returns proper node values to given addresses. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 4 ++ tools/perf/tests/mem2node.c | 75 +++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 4 files changed, 81 insertions(+) create mode 100644 tools/perf/tests/mem2node.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 62ca0174d5e1..6c108fa79ae3 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -48,6 +48,7 @@ perf-y += bitmap.o perf-y += perf-hooks.o perf-y += clang.o perf-y += unit_number__scnprintf.o +perf-y += mem2node.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 38bf109ce106..625f5a6772af 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -274,6 +274,10 @@ static struct test generic_tests[] = { .desc = "unit_number__scnprintf", .func = test__unit_number__scnprint, }, + { + .desc = "mem2node", + .func = test__mem2node, + }, { .func = NULL, }, diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c new file mode 100644 index 000000000000..0c3c87f86e03 --- /dev/null +++ b/tools/perf/tests/mem2node.c @@ -0,0 +1,75 @@ +#include +#include +#include "cpumap.h" +#include "mem2node.h" +#include "tests.h" + +static struct node { + int node; + const char *map; +} test_nodes[] = { + { .node = 0, .map = "0" }, + { .node = 1, .map = "1-2" }, + { .node = 3, .map = "5-7,9" }, +}; + +#define T TEST_ASSERT_VAL + +static unsigned long *get_bitmap(const char *str, int nbits) +{ + struct cpu_map *map = cpu_map__new(str); + unsigned long *bm = NULL; + int i; + + bm = bitmap_alloc(nbits); + + if (map && bm) { + bitmap_zero(bm, nbits); + + for (i = 0; i < map->nr; i++) { + set_bit(map->map[i], bm); + } + } + + if (map) + cpu_map__put(map); + else + free(bm); + + return bm && map ? bm : NULL; +} + +int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused) +{ + struct mem2node map; + struct memory_node nodes[3]; + struct perf_env env = { + .memory_nodes = (struct memory_node *) &nodes[0], + .nr_memory_nodes = ARRAY_SIZE(nodes), + .memory_bsize = 0x100, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nodes); i++) { + nodes[i].node = test_nodes[i].node; + nodes[i].size = 10; + + T("failed: alloc bitmap", + (nodes[i].set = get_bitmap(test_nodes[i].map, 10))); + } + + T("failed: mem2node__init", !mem2node__init(&map, &env)); + T("failed: mem2node__node", 0 == mem2node__node(&map, 0x50)); + T("failed: mem2node__node", 1 == mem2node__node(&map, 0x100)); + T("failed: mem2node__node", 1 == mem2node__node(&map, 0x250)); + T("failed: mem2node__node", 3 == mem2node__node(&map, 0x500)); + T("failed: mem2node__node", 3 == mem2node__node(&map, 0x650)); + T("failed: mem2node__node", -1 == mem2node__node(&map, 0x450)); + T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050)); + + for (i = 0; i < ARRAY_SIZE(nodes); i++) + free(nodes[i].set); + + mem2node__exit(&map); + return 0; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 9f51edac44ae..a9760e790563 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -103,6 +103,7 @@ int test__clang(struct test *test, int subtest); const char *test__clang_subtest_get_desc(int subtest); int test__clang_subtest_get_nr(void); int test__unit_number__scnprint(struct test *test, int subtest); +int test__mem2node(struct test *t, int subtest); bool test__bp_signal_is_supported(void); From 8fab7843a15078814764e01c303d175c92b500c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:37 +0100 Subject: [PATCH 06/47] perf c2c record: Record physical addresses in samples We are going to display NUMA node information in following patches. For this we need to have physical address data in the sample. Adding --phys-data as a default option for perf c2c record. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 2 +- tools/perf/builtin-c2c.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 822414235170..095aebdc5bb7 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -116,7 +116,7 @@ and calls standard perf record command. Following perf record options are configured by default: (check perf record man page for details) - -W,-d,--sample-cpu + -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by default: diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 98d243fa0c06..95765a1db903 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2704,7 +2704,7 @@ static int perf_c2c__record(int argc, const char **argv) argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); - rec_argc = argc + 10; /* max number of arguments */ + rec_argc = argc + 11; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; @@ -2720,6 +2720,7 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; + rec_argv[i++] = "--phys-data"; rec_argv[i++] = "--sample-cpu"; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { From 3773138828b38f3f1364ef318cd876b16182388a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:38 +0100 Subject: [PATCH 07/47] perf c2c report: Make calc_width work with struct c2c_hist_entry We are going to calculate tje column width based on the struct c2c_hist_entry data, so making calc_width to work with struct c2c_hist_entry. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 95765a1db903..43ce55550c9d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1839,20 +1839,24 @@ static inline int valid_hitm_or_store(struct hist_entry *he) return has_hitm || c2c_he->stats.store; } -static void calc_width(struct hist_entry *he) +static void calc_width(struct c2c_hist_entry *c2c_he) { struct c2c_hists *c2c_hists; - c2c_hists = container_of(he->hists, struct c2c_hists, hists); - hists__calc_col_len(&c2c_hists->hists, he); + c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); + hists__calc_col_len(&c2c_hists->hists, &c2c_he->he); } static int filter_cb(struct hist_entry *he) { + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); - calc_width(he); + calc_width(c2c_he); if (!valid_hitm_or_store(he)) he->filtered = HIST_FILTER__C2C; @@ -1869,7 +1873,7 @@ static int resort_cl_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; - calc_width(he); + calc_width(c2c_he); if (display && c2c_hists) { static unsigned int idx; From bc229c21f2c79ef0f7b30d3a2fce8c2886ffa6c7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:39 +0100 Subject: [PATCH 08/47] perf c2c report: Call calc_width() only for displayed entries There's no need to calculate column widths for entries that are not going to be displayed. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 43ce55550c9d..821112e8ba97 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1873,12 +1873,11 @@ static int resort_cl_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; - calc_width(c2c_he); - if (display && c2c_hists) { static unsigned int idx; c2c_he->cacheline_idx = idx++; + calc_width(c2c_he); c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); From 7f834c2e84bbcf94a1ed65a2ae648129e1901370 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:40 +0100 Subject: [PATCH 09/47] perf c2c report: Display node for cacheline address Adding the NUMA node info for the data cacheline. Adding the new column to both "Shared Data Cache Line Table" and "Shared Cache Line Distribution Pareto". Note the new 'Node' column next to the 'Cacheline'. $ perf c2c report --stdio ================================================= Shared Data Cache Line Table ================================================= # # Total Tot ----- LLC Load Hitm ----- # Index Cacheline Node records Hitm Total Lcl Rmt # ..... .................. .... ....... ....... ....... ....... ....... # 0 0x7f0830100000 0 84 10.53% 8 8 0 1 0xffff922a93154200 0 3 2.63% 2 2 0 2 0xffff922a93154500 0 4 2.63% 2 2 0 ... Note the new 'Node' column next to the 'Offset'. ================================================= Shared Cache Line Distribution Pareto ================================================= # # ----- HITM ----- -- Store Refs -- Data address # Num Rmt Lcl L1 Hit L1 Miss Offset Node Pid # ..... ....... ....... ....... ....... .................. .... ....... # ------------------------------------------------------------- 0 0 8 32 2 0x7f0830100000 ------------------------------------------------------------- 0.00% 75.00% 21.88% 0.00% 0x18 0 1791 0.00% 12.50% 37.50% 0.00% 0x18 0 1791 0.00% 0.00% 34.38% 0.00% 0x18 0 1791 Using the mem2node object to get the NUMA node data. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 119 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 821112e8ba97..45c047fdd7ac 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -32,6 +32,7 @@ #include "evsel.h" #include "ui/browsers/hists.h" #include "thread.h" +#include "mem2node.h" struct c2c_hists { struct hists hists; @@ -49,6 +50,7 @@ struct c2c_hist_entry { struct c2c_hists *hists; struct c2c_stats stats; unsigned long *cpuset; + unsigned long *nodeset; struct c2c_stats *node_stats; unsigned int cacheline_idx; @@ -59,6 +61,11 @@ struct c2c_hist_entry { * because of its callchain dynamic entry */ struct hist_entry he; + + unsigned long paddr; + unsigned long paddr_cnt; + bool paddr_zero; + char *nodestr; }; static char const *coalesce_default = "pid,iaddr"; @@ -66,6 +73,7 @@ static char const *coalesce_default = "pid,iaddr"; struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; + struct mem2node mem2node; unsigned long **nodes; int nodes_cnt; @@ -123,6 +131,10 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he->cpuset) return NULL; + c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt); + if (!c2c_he->nodeset) + return NULL; + c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); if (!c2c_he->node_stats) return NULL; @@ -145,6 +157,8 @@ static void c2c_he_free(void *he) } free(c2c_he->cpuset); + free(c2c_he->nodeset); + free(c2c_he->nodestr); free(c2c_he->node_stats); free(c2c_he); } @@ -194,6 +208,28 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, set_bit(sample->cpu, c2c_he->cpuset); } +static void c2c_he__set_node(struct c2c_hist_entry *c2c_he, + struct perf_sample *sample) +{ + int node; + + if (!sample->phys_addr) { + c2c_he->paddr_zero = true; + return; + } + + node = mem2node__node(&c2c.mem2node, sample->phys_addr); + if (WARN_ONCE(node < 0, "WARNING: failed to find node\n")) + return; + + set_bit(node, c2c_he->nodeset); + + if (c2c_he->paddr != sample->phys_addr) { + c2c_he->paddr_cnt++; + c2c_he->paddr = sample->phys_addr; + } +} + static void compute_stats(struct c2c_hist_entry *c2c_he, struct c2c_stats *stats, u64 weight) @@ -257,6 +293,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_hists->stats, &stats); c2c_he__set_cpu(c2c_he, sample); + c2c_he__set_node(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -293,6 +330,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, compute_stats(c2c_he, &stats, sample->weight); c2c_he__set_cpu(c2c_he, sample); + c2c_he__set_node(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -455,6 +493,20 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); } +static int +dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (WARN_ON_ONCE(!c2c_he->nodestr)) + return 0; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr); +} + static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { @@ -1207,6 +1259,14 @@ static struct c2c_dimension dim_dcacheline = { .width = 18, }; +static struct c2c_dimension dim_dcacheline_node = { + .header = HEADER_LOW("Node"), + .name = "dcacheline_node", + .cmp = empty_cmp, + .entry = dcacheline_node_entry, + .width = 4, +}; + static struct c2c_header header_offset_tui = HEADER_LOW("Off"); static struct c2c_dimension dim_offset = { @@ -1217,6 +1277,14 @@ static struct c2c_dimension dim_offset = { .width = 18, }; +static struct c2c_dimension dim_offset_node = { + .header = HEADER_LOW("Node"), + .name = "offset_node", + .cmp = empty_cmp, + .entry = dcacheline_node_entry, + .width = 4, +}; + static struct c2c_dimension dim_iaddr = { .header = HEADER_LOW("Code address"), .name = "iaddr", @@ -1536,7 +1604,9 @@ static struct c2c_dimension dim_dcacheline_num_empty = { static struct c2c_dimension *dimensions[] = { &dim_dcacheline, + &dim_dcacheline_node, &dim_offset, + &dim_offset_node, &dim_iaddr, &dim_tot_hitm, &dim_lcl_hitm, @@ -1839,12 +1909,44 @@ static inline int valid_hitm_or_store(struct hist_entry *he) return has_hitm || c2c_he->stats.store; } +static void set_node_width(struct c2c_hist_entry *c2c_he, int len) +{ + struct c2c_dimension *dim; + + dim = &c2c.hists == c2c_he->hists ? + &dim_dcacheline_node : &dim_offset_node; + + if (len > dim->width) + dim->width = len; +} + +static int set_nodestr(struct c2c_hist_entry *c2c_he) +{ + char buf[30]; + int len; + + if (c2c_he->nodestr) + return 0; + + if (bitmap_weight(c2c_he->nodeset, c2c.nodes_cnt)) { + len = bitmap_scnprintf(c2c_he->nodeset, c2c.nodes_cnt, + buf, sizeof(buf)); + } else { + len = scnprintf(buf, sizeof(buf), "N/A"); + } + + set_node_width(c2c_he, len); + c2c_he->nodestr = strdup(buf); + return c2c_he->nodestr ? 0 : -ENOMEM; +} + static void calc_width(struct c2c_hist_entry *c2c_he) { struct c2c_hists *c2c_hists; c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); hists__calc_col_len(&c2c_hists->hists, &c2c_he->he); + set_nodestr(c2c_he); } static int filter_cb(struct hist_entry *he) @@ -2474,7 +2576,7 @@ static int build_cl_output(char *cl_sort, bool no_source) "percent_lcl_hitm," "percent_stores_l1hit," "percent_stores_l1miss," - "offset,", + "offset,offset_node,", add_pid ? "pid," : "", add_tid ? "tid," : "", add_iaddr ? "iaddr," : "", @@ -2603,17 +2705,21 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } - err = setup_callchain(session->evlist); + err = mem2node__init(&c2c.mem2node, &session->header.env); if (err) goto out_session; + err = setup_callchain(session->evlist); + if (err) + goto out_mem2node; + if (symbol__init(&session->header.env) < 0) - goto out_session; + goto out_mem2node; /* No pipe support at the moment. */ if (perf_data__is_pipe(session->data)) { pr_debug("No pipe support at the moment.\n"); - goto out_session; + goto out_mem2node; } if (c2c.use_stdio) @@ -2626,12 +2732,13 @@ static int perf_c2c__report(int argc, const char **argv) err = perf_session__process_events(session); if (err) { pr_err("failed to process sample\n"); - goto out_session; + goto out_mem2node; } c2c_hists__reinit(&c2c.hists, "cl_idx," "dcacheline," + "dcacheline_node," "tot_recs," "percent_hitm," "tot_hitm,lcl_hitm,rmt_hitm," @@ -2657,6 +2764,8 @@ static int perf_c2c__report(int argc, const char **argv) perf_c2c_display(session); +out_mem2node: + mem2node__exit(&c2c.mem2node); out_session: perf_session__delete(session); out: From d0802b1ee2c8b95e960f46fa14fe0fee562cb79a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:41 +0100 Subject: [PATCH 10/47] perf c2c report: Add span header over cacheline data Forcing the NUMA node output to be grouped with the "Cacheline" column in both "Shared Data Cache Line Table" and "Shared Cache Line Distribution Pareto" tables. Before: # Total Tot ----- LLC Load Hitm ----- # Index Cacheline Node records Hitm Total Lcl Rmt # ..... .................. .... ....... ....... ....... ....... ....... # 0 0x7f0830100000 0 84 10.53% 8 8 0 1 0xffff922a93154200 0 3 2.63% 2 2 0 2 0xffff922a93154500 0 4 2.63% 2 2 0 After: # ------- Cacheline ------ Total Tot ----- LLC Load Hitm ----- # Index Address Node records Hitm Total Lcl Rmt # ..... .................. .... ....... ....... ....... ....... ....... # 0 0x7f0830100000 0 84 10.53% 8 8 0 1 0xffff922a93154200 0 3 2.63% 2 2 0 2 0xffff922a93154500 0 4 2.63% 2 2 0 Before: # ----- HITM ----- -- Store Refs -- Data address # Num Rmt Lcl L1 Hit L1 Miss Offset Node Pid # ..... ....... ....... ....... ....... .................. .... ....... # ------------------------------------------------------------- 0 0 8 32 2 0x7f0830100000 ------------------------------------------------------------- 0.00% 75.00% 21.88% 0.00% 0x18 0 1791 0.00% 12.50% 37.50% 0.00% 0x18 0 1791 0.00% 0.00% 34.38% 0.00% 0x18 0 1791 After: # ----- HITM ----- -- Store Refs -- ----- Data address ----- # Num Rmt Lcl L1 Hit L1 Miss Offset Node Pid # ..... ....... ....... ....... ....... .................. .... ....... # ------------------------------------------------------------- 0 0 8 32 2 0x7f0830100000 ------------------------------------------------------------- 0.00% 75.00% 21.88% 0.00% 0x18 0 1791 0.00% 12.50% 37.50% 0.00% 0x18 0 1791 0.00% 0.00% 34.38% 0.00% 0x18 0 1791 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 63 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 45c047fdd7ac..a6336e4e2850 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1252,7 +1252,7 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, } static struct c2c_dimension dim_dcacheline = { - .header = HEADER_LOW("Cacheline"), + .header = HEADER_SPAN("--- Cacheline ----", "Address", 1), .name = "dcacheline", .cmp = dcacheline_cmp, .entry = dcacheline_entry, @@ -1267,10 +1267,10 @@ static struct c2c_dimension dim_dcacheline_node = { .width = 4, }; -static struct c2c_header header_offset_tui = HEADER_LOW("Off"); +static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 1); static struct c2c_dimension dim_offset = { - .header = HEADER_BOTH("Data address", "Offset"), + .header = HEADER_SPAN("--- Data address -", "Offset", 1), .name = "offset", .cmp = offset_cmp, .entry = offset_entry, @@ -2453,14 +2453,64 @@ static void perf_c2c_display(struct perf_session *session) } #endif /* HAVE_SLANG_SUPPORT */ -static void ui_quirks(void) +static char *fill_line(const char *orig, int len) { + int i, j, olen = strlen(orig); + char *buf; + + buf = zalloc(len + 1); + if (!buf) + return NULL; + + j = len / 2 - olen / 2; + + for (i = 0; i < j - 1; i++) + buf[i] = '-'; + + buf[i++] = ' '; + + strcpy(buf + i, orig); + + i += olen; + + buf[i++] = ' '; + + for (; i < len; i++) + buf[i] = '-'; + + return buf; +} + +static int ui_quirks(void) +{ + const char *nodestr = "Data address"; + char *buf; + if (!c2c.use_stdio) { dim_offset.width = 5; dim_offset.header = header_offset_tui; + nodestr = "CL"; } dim_percent_hitm.header = percent_hitm_header[c2c.display]; + + /* Fix the zero line for dcacheline column. */ + buf = fill_line("Cacheline", dim_dcacheline.width + + dim_dcacheline_node.width + 2); + if (!buf) + return -ENOMEM; + + dim_dcacheline.header.line[0].text = buf; + + /* Fix the zero line for offset column. */ + buf = fill_line(nodestr, dim_offset.width + + dim_offset_node.width + 2); + if (!buf) + return -ENOMEM; + + dim_offset.header.line[0].text = buf; + + return 0; } #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" @@ -2760,7 +2810,10 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); - ui_quirks(); + if (ui_quirks()) { + pr_err("failed to setup UI\n"); + goto out_mem2node; + } perf_c2c_display(session); From 03d9fcb701340de3446b4ff4ddb9f5407d1412f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Mar 2018 11:14:42 +0100 Subject: [PATCH 11/47] perf c2c report: Add cacheline address count column Adding the 'PA cnt' column grouped under data cacheline address. It shows how many times the physical addresses changed for the hist entry. It does not show the number of different physical addresses for entry, because we don't store those. We only track the number of times we got different address than we currently hold, which is not expensive and gives similar info. $ perf c2c report --stdio # ----------- Cacheline ---------- Total Tot ----- LLC Load Hitm ----- # Index Address Node PA cnt records Hitm Total Lcl Rmt # ..... .................. .... ...... ....... ....... ....... ....... ....... # 0 0xffff9ad56dca0a80 0 9 10 7.69% 2 2 0 1 0xffff9ad56dce0a80 0 9 9 7.69% 2 2 0 2 0xffff9ad37659ad80 0 1 2 3.85% 1 1 0 ... # ----- HITM ----- -- Store Refs -- --------- Data address --------- # Num Rmt Lcl L1 Hit L1 Miss Offset Node PA cnt Pid # ..... ....... ....... ....... ....... .................. .... ...... ....... # ------------------------------------------------------------- 0 0 2 3 0 0xffff9ad56dca0a80 ------------------------------------------------------------- 0.00% 0.00% 33.33% 0.00% 0x0 0 1 2510 0.00% 0.00% 33.33% 0.00% 0x4 0 1 2476 0.00% 0.00% 33.33% 0.00% 0x20 0 1 0 0.00% 100.00% 0.00% 0.00% 0x38 0 1 0 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180309101442.9224-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a6336e4e2850..2126bfbcb385 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -507,6 +507,17 @@ dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr); } +static int +dcacheline_node_count(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return scnprintf(hpp->buf, hpp->size, "%*lu", width, c2c_he->paddr_cnt); +} + static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) { @@ -1252,7 +1263,7 @@ cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, } static struct c2c_dimension dim_dcacheline = { - .header = HEADER_SPAN("--- Cacheline ----", "Address", 1), + .header = HEADER_SPAN("--- Cacheline ----", "Address", 2), .name = "dcacheline", .cmp = dcacheline_cmp, .entry = dcacheline_entry, @@ -1267,10 +1278,18 @@ static struct c2c_dimension dim_dcacheline_node = { .width = 4, }; -static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 1); +static struct c2c_dimension dim_dcacheline_count = { + .header = HEADER_LOW("PA cnt"), + .name = "dcacheline_count", + .cmp = empty_cmp, + .entry = dcacheline_node_count, + .width = 6, +}; + +static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 2); static struct c2c_dimension dim_offset = { - .header = HEADER_SPAN("--- Data address -", "Offset", 1), + .header = HEADER_SPAN("--- Data address -", "Offset", 2), .name = "offset", .cmp = offset_cmp, .entry = offset_entry, @@ -1605,6 +1624,7 @@ static struct c2c_dimension dim_dcacheline_num_empty = { static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_dcacheline_node, + &dim_dcacheline_count, &dim_offset, &dim_offset_node, &dim_iaddr, @@ -2496,7 +2516,8 @@ static int ui_quirks(void) /* Fix the zero line for dcacheline column. */ buf = fill_line("Cacheline", dim_dcacheline.width + - dim_dcacheline_node.width + 2); + dim_dcacheline_node.width + + dim_dcacheline_count.width + 4); if (!buf) return -ENOMEM; @@ -2504,7 +2525,8 @@ static int ui_quirks(void) /* Fix the zero line for offset column. */ buf = fill_line(nodestr, dim_offset.width + - dim_offset_node.width + 2); + dim_offset_node.width + + dim_dcacheline_count.width + 4); if (!buf) return -ENOMEM; @@ -2626,7 +2648,7 @@ static int build_cl_output(char *cl_sort, bool no_source) "percent_lcl_hitm," "percent_stores_l1hit," "percent_stores_l1miss," - "offset,offset_node,", + "offset,offset_node,dcacheline_count,", add_pid ? "pid," : "", add_tid ? "tid," : "", add_iaddr ? "iaddr," : "", @@ -2789,6 +2811,7 @@ static int perf_c2c__report(int argc, const char **argv) "cl_idx," "dcacheline," "dcacheline_node," + "dcacheline_count," "tot_recs," "percent_hitm," "tot_hitm,lcl_hitm,rmt_hitm," From 744e9a91cf898bf027dbe65cc61a5d7565335cba Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 8 Mar 2018 21:10:30 -0600 Subject: [PATCH 12/47] perf tools arm64: Add libdw DWARF post unwind support for ARM64 Based on prior work: https://lkml.org/lkml/2014/5/6/395 and on how other arches add libdw unwind support. Includes support for running the unwind test, e.g., on a system with only elfutils' libdw 0.170, the test now runs, and successfully: $ ./perf test unwind 56: Test dwarf unwind : Ok Originally-by: Jean Pihet Reported-by: Christian Hansen Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180308211030.4ee4a0d6ff6dc5cda1b567d4@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/arch/arm64/include/arch-tests.h | 12 +++++ tools/perf/arch/arm64/tests/Build | 2 + tools/perf/arch/arm64/tests/arch-tests.c | 16 ++++++ tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/unwind-libdw.c | 60 ++++++++++++++++++++++ 6 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/arm64/include/arch-tests.h create mode 100644 tools/perf/arch/arm64/tests/arch-tests.c create mode 100644 tools/perf/arch/arm64/util/unwind-libdw.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 89cb2a36b8ff..98ff73648b51 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -75,7 +75,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc s390)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390)) NO_LIBDW_DWARF_UNWIND := 1 endif diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm64/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm64/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index c0b8dfef98ba..68f8a8eb3ad0 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c new file mode 100644 index 000000000000..7623d85e77f3 --- /dev/null +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/event.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r); \ + val; \ +}) + + dwarf_regs[0] = REG(X0); + dwarf_regs[1] = REG(X1); + dwarf_regs[2] = REG(X2); + dwarf_regs[3] = REG(X3); + dwarf_regs[4] = REG(X4); + dwarf_regs[5] = REG(X5); + dwarf_regs[6] = REG(X6); + dwarf_regs[7] = REG(X7); + dwarf_regs[8] = REG(X8); + dwarf_regs[9] = REG(X9); + dwarf_regs[10] = REG(X10); + dwarf_regs[11] = REG(X11); + dwarf_regs[12] = REG(X12); + dwarf_regs[13] = REG(X13); + dwarf_regs[14] = REG(X14); + dwarf_regs[15] = REG(X15); + dwarf_regs[16] = REG(X16); + dwarf_regs[17] = REG(X17); + dwarf_regs[18] = REG(X18); + dwarf_regs[19] = REG(X19); + dwarf_regs[20] = REG(X20); + dwarf_regs[21] = REG(X21); + dwarf_regs[22] = REG(X22); + dwarf_regs[23] = REG(X23); + dwarf_regs[24] = REG(X24); + dwarf_regs[25] = REG(X25); + dwarf_regs[26] = REG(X26); + dwarf_regs[27] = REG(X27); + dwarf_regs[28] = REG(X28); + dwarf_regs[29] = REG(X29); + dwarf_regs[30] = REG(LR); + dwarf_regs[31] = REG(SP); + + if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX, + dwarf_regs)) + return false; + + dwarf_pc = REG(PC); + dwfl_thread_state_register_pc(thread, dwarf_pc); + + return true; +} From 4c0ab16052054946b7b28f8b0ceee57c10d64cc7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:26 +0800 Subject: [PATCH 13/47] perf vendor events: Drop incomplete multiple mapfile support Currently jevents supports multiple mapfiles, but this is only in the form where mapfile basename starts with 'mapfile.csv' At the moment, no architectures actually use multiple mapfiles, so drop the support for now. This patch also solves a nuisance where, when the mapfile is edited and the text editor may create a backup, jevents may use the backup, as shown: jevents: Many mapfiles? Using pmu-events/arch/arm64/mapfile.csv~, ignoring pmu-events/arch/arm64/mapfile.csv Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/README | 5 ++--- tools/perf/pmu-events/jevents.c | 10 ++-------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index c2ee3e4417fe..2407abc1d441 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -11,9 +11,8 @@ tree tools/perf/pmu-events/arch/foo. - Regular files with '.json' extension in the name are assumed to be JSON files, each of which describes a set of PMU events. - - Regular files with basename starting with 'mapfile.csv' are assumed - to be a CSV file that maps a specific CPU to its set of PMU events. - (see below for mapfile format) + - The CSV file that maps a specific CPU to its set of PMU events is to + be named 'mapfile.csv' (see below for mapfile format). - Directories are traversed, but all other files are ignored. diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index b578aa26e375..9e0a21e74a67 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -798,16 +798,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, * after processing all JSON files (so we can write out the * mapping table after all PMU events tables). * - * TODO: Allow for multiple mapfiles? Punt for now. */ if (level == 1 && is_file) { - if (!strncmp(bname, "mapfile.csv", 11)) { - if (mapfile) { - pr_info("%s: Many mapfiles? Using %s, ignoring %s\n", - prog, mapfile, fpath); - } else { - mapfile = strdup(fpath); - } + if (!strcmp(bname, "mapfile.csv")) { + mapfile = strdup(fpath); return 0; } From 931ef5dc5c18717d24e5b8d8a968e35638508051 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:27 +0800 Subject: [PATCH 14/47] perf vendor events: Fix error code in json_events() When EXPECT macro fails an assertion, the error code is not properly set after the first loop of tokens in function json_events(). This is because err is set to the return value from func function pointer call, which must be 0 to continue to loop, yet it is not reset for for each loop. I assume that this was not the intention, so change the code so err is set appropriately in EXPECT macro itself. In addition to this, the indention in EXPECT macro is tidied. The current indention alludes that the 2 statements following the if statement are in the body, which is not true. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9e0a21e74a67..edff989fbcea 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -249,9 +249,10 @@ static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val) jsmntok_t *loc = (t); \ if (!(t)->start && (t) > tokens) \ loc = (t) - 1; \ - pr_err("%s:%d: " m ", got %s\n", fn, \ - json_line(map, loc), \ - json_name(t)); \ + pr_err("%s:%d: " m ", got %s\n", fn, \ + json_line(map, loc), \ + json_name(t)); \ + err = -EIO; \ goto out_free; \ } } while (0) @@ -416,7 +417,7 @@ int json_events(const char *fn, char *metric_name, char *metric_group), void *data) { - int err = -EIO; + int err; size_t size; jsmntok_t *tokens, *tok; int i, j, len; From 6f2f2ca3454ec4fa03fcd4507bdd7fe97303065b Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:28 +0800 Subject: [PATCH 15/47] perf vendor events: Drop support for unused topic directories Currently a topic subdirectory is supported in the pmu-events dir, in the following sample structure: /arch/platform/subtopic/mysubtopic.json Upto 256 levels of topic subdirectories are supported. So this means that JSONs may be located in a topic dir as well as the platform dir. This topic subdirectory causes problems if we want to add support for a vendor dir in the pmu-events structure (in the form arch/platform/vendor), in that we cannot differentiate between a vendor dir and a topic dir. Since the topic dir feature is not used, drop it so it does not block adding vendor subdirectory support. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 37 +++++++++------------------------ 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index edff989fbcea..1d02fafdc34d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -256,25 +256,18 @@ static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val) goto out_free; \ } } while (0) -#define TOPIC_DEPTH 256 -static char *topic_array[TOPIC_DEPTH]; -static int topic_level; +static char *topic; static char *get_topic(void) { - char *tp_old, *tp = NULL; + char *tp; int i; - for (i = 0; i < topic_level + 1; i++) { - int n; - - tp_old = tp; - n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]); - if (n < 0) { - pr_info("%s: asprintf() error %s\n", prog); - return NULL; - } - free(tp_old); + /* tp is free'd in process_one_file() */ + i = asprintf(&tp, "%s", topic); + if (i < 0) { + pr_info("%s: asprintf() error %s\n", prog); + return NULL; } for (i = 0; i < (int) strlen(tp); i++) { @@ -291,25 +284,15 @@ static char *get_topic(void) return tp; } -static int add_topic(int level, char *bname) +static int add_topic(char *bname) { - char *topic; - - level -= 2; - - if (level >= TOPIC_DEPTH) - return -EINVAL; - + free(topic); topic = strdup(bname); if (!topic) { pr_info("%s: strdup() error %s for file %s\n", prog, strerror(errno), bname); return -ENOMEM; } - - free(topic_array[topic_level]); - topic_array[topic_level] = topic; - topic_level = level; return 0; } @@ -824,7 +807,7 @@ static int process_one_file(const char *fpath, const struct stat *sb, } } - if (level > 1 && add_topic(level, bname)) + if (level > 1 && add_topic(bname)) return -ENOMEM; /* From 51ce1dcc5d0d3e40e26893a7fa9e30538960ee7e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:29 +0800 Subject: [PATCH 16/47] perf vendor events: Add support for pmu events vendor subdirectory For some architectures (like arm), it is required to support a vendor subdirectory and not locate all the JSONs for a specific vendor in the same folder. This is because all the events for the same vendor will be placed in the same pmu events table, which may cause conflict. This conflict would be in the instance that a vendor's custom implemented events do have the same meaning on different platforms, so events in the pmu table would conflict. In addition, per list command may show events which are not even supported for a given platform. This patch adds support for a arch/vendor/platform directory hierarchy, while maintaining backwards-compatibility for existing arch/platform structure. In this, each platform would always have its own pmu events table. In generated file pmu_events.c, each platform table name is in the format pme{_vendor}_platform, like this: struct pmu_events_map pmu_events_map[] = { { .cpuid = "0x00000000420f5160", .version = "v1", .type = "core", .table = pme_cavium_thunderx2 }, { .cpuid = 0, .version = 0, .type = 0, .table = 0, }, }; Signed-off-by: John Garry Acked-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Sukadev Bhattiprolu Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-5-git-send-email-john.garry@huawei.com Link: http://lkml.kernel.org/r/1521047452-28565-1-git-send-email-john.garry@huawei.com [ Add missing limits.h include, fixing the build on at least all Alpine Linux versions tested (3.4 to 3.7 + edge), ] [ Applied a patch to fix reading ./.. directories in XFS, see second Link tag ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/README | 4 ++ tools/perf/pmu-events/jevents.c | 68 ++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index 2407abc1d441..655286ff8767 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -28,6 +28,10 @@ sub directory. Thus for the Silvermont X86 CPU: Cache.json Memory.json Virtual-Memory.json Frontend.json Pipeline.json +The JSONs folder for a CPU model/family may be placed in the root arch +folder, or may be placed in a vendor sub-folder under the arch folder +for instances where the arch and vendor are not the same. + Using the JSON files and the mapfile, 'jevents' generates the C source file, 'pmu-events.c', which encodes the two sets of tables: diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 1d02fafdc34d..0981d313064f 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include /* getrlimit */ #include /* getrlimit */ @@ -572,7 +573,7 @@ static char *file_name_to_table_name(char *fname) * Derive rest of table name from basename of the JSON file, * replacing hyphens and stripping out .json suffix. */ - n = asprintf(&tblname, "pme_%s", basename(fname)); + n = asprintf(&tblname, "pme_%s", fname); if (n < 0) { pr_info("%s: asprintf() error %s for file %s\n", prog, strerror(errno), fname); @@ -582,7 +583,7 @@ static char *file_name_to_table_name(char *fname) for (i = 0; i < strlen(tblname); i++) { c = tblname[i]; - if (c == '-') + if (c == '-' || c == '/') tblname[i] = '_'; else if (c == '.') { tblname[i] = '\0'; @@ -739,25 +740,80 @@ static int get_maxfds(void) static FILE *eventsfp; static char *mapfile; +static int is_leaf_dir(const char *fpath) +{ + DIR *d; + struct dirent *dir; + int res = 1; + + d = opendir(fpath); + if (!d) + return 0; + + while ((dir = readdir(d)) != NULL) { + if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) + continue; + + if (dir->d_type == DT_DIR) { + res = 0; + break; + } else if (dir->d_type == DT_UNKNOWN) { + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", fpath, dir->d_name); + if (stat(path, &st)) + break; + + if (S_ISDIR(st.st_mode)) { + res = 0; + break; + } + } + } + + closedir(d); + + return res; +} + static int process_one_file(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - char *tblname, *bname = (char *) fpath + ftwbuf->base; + char *tblname, *bname; int is_dir = typeflag == FTW_D; int is_file = typeflag == FTW_F; int level = ftwbuf->level; int err = 0; + if (level == 2 && is_dir) { + /* + * For level 2 directory, bname will include parent name, + * like vendor/platform. So search back from platform dir + * to find this. + */ + bname = (char *) fpath + ftwbuf->base - 2; + for (;;) { + if (*bname == '/') + break; + bname--; + } + bname++; + } else + bname = (char *) fpath + ftwbuf->base; + pr_debug("%s %d %7jd %-20s %s\n", is_file ? "f" : is_dir ? "d" : "x", level, sb->st_size, bname, fpath); - /* base dir */ - if (level == 0) + /* base dir or too deep */ + if (level == 0 || level > 3) return 0; + /* model directory, reset topic */ - if (level == 1 && is_dir) { + if ((level == 1 && is_dir && is_leaf_dir(fpath)) || + (level == 2 && is_dir)) { if (close_table) print_events_table_suffix(eventsfp); From e3b9f1e81de2083f359bacd2a94bf1c024f2ede0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:30 +0800 Subject: [PATCH 17/47] perf vendor events arm64: Relocate ThunderX2 JSON to cavium subdirectory Since jevents now supports vendor subdirectory, relocate the ThunderX2 JSON to Cavium subdirectory. Signed-off-by: John Garry Tested-by: Ganapatrao Kulkarni Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../{thunderx2-imp-def.json => thunderx2/core-imp-def.json} | 0 tools/perf/pmu-events/arch/arm64/mapfile.csv | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tools/perf/pmu-events/arch/arm64/cavium/{thunderx2-imp-def.json => thunderx2/core-imp-def.json} (100%) diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json rename to tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index e61c9ca6cf9e..952a05cbf675 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -12,5 +12,5 @@ # # #Family-model,Version,Filename,EventType -0x00000000420f5160,v1,cavium,core +0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core From 82e6fdd6c01257d64009defbbea19d12ba667670 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:31 +0800 Subject: [PATCH 18/47] perf vendor events arm64: Relocate Cortex A53 JSONs to arm subdirectory Since jevents now supports vendor subdirectory, relocate the Cortex-A53 JSONs to arm subdirectory. Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/branch.json | 0 tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/bus.json | 0 .../perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/cache.json | 0 .../perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/memory.json | 0 .../perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/other.json | 0 .../pmu-events/arch/arm64/{ => arm}/cortex-a53/pipeline.json | 0 tools/perf/pmu-events/arch/arm64/mapfile.csv | 2 +- 7 files changed, 1 insertion(+), 1 deletion(-) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/branch.json (100%) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/bus.json (100%) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/cache.json (100%) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/memory.json (100%) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/other.json (100%) rename tools/perf/pmu-events/arch/arm64/{ => arm}/cortex-a53/pipeline.json (100%) diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/other.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json diff --git a/tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json similarity index 100% rename from tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json rename to tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 952a05cbf675..cf14e23b6404 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -12,5 +12,5 @@ # # #Family-model,Version,Filename,EventType +0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core 0x00000000420f5160,v1,cavium/thunderx2,core -0x00000000410fd03[[:xdigit:]],v1,cortex-a53,core From e9d32c1bf0cd7a98358ec4aa1625bf2b3459b9ac Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:32 +0800 Subject: [PATCH 19/47] perf vendor events: Add support for arch standard events For some architectures (like arm), there are architecture- defined events. Sometimes these events may be "recommended" according to the architecture standard, in that the implementer is free ignore the "recommendation" and create its custom event. This patch adds support for parsing standard events from arch-defined JSONs, and fixing up vendor events when they have implemented these events as standard. Support is also ensured that the vendor may implement their own custom events. A new step is added to the pmu events parsing to fix up the vendor events with the arch-standard events. The arch-defined JSONs must be placed in the arch root folder for preprocessing prior to tree JSON processing. In the vendor JSON, to specify that the arch event is supported, the keyword "ArchStdEvent" should be used, like this: [ { "ArchStdEvent": "L1D_CACHE_WR", }, ] Matching is based on the "EventName" field in the architecture JSON. No other JSON objects are strictly required. However, for other objects added, these take precedence over architecture defined standard events, thus supporting separate events which have the same event code. Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-8-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/Build | 2 + tools/perf/pmu-events/README | 6 ++ tools/perf/pmu-events/jevents.c | 167 +++++++++++++++++++++++++++++++- 3 files changed, 172 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 999a4e878162..17783913d330 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,10 +1,12 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o +CHOSTFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ find $(JDIR) -name '*.json' -o -name 'mapfile.csv') + # # Locate/process JSON files in pmu-events/arch/ # directory and create tables in pmu-events.c. diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README index 655286ff8767..e62b09b6a844 100644 --- a/tools/perf/pmu-events/README +++ b/tools/perf/pmu-events/README @@ -16,6 +16,12 @@ tree tools/perf/pmu-events/arch/foo. - Directories are traversed, but all other files are ignored. + - To reduce JSON event duplication per architecture, platform JSONs may + use "ArchStdEvent" keyword to dereference an "Architecture standard + events", defined in architecture standard JSONs. + Architecture standard JSONs must be located in the architecture root + folder. Matching is based on the "EventName" field. + The PMU events supported by a CPU model are expected to grouped into topics such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic should be placed in a separate JSON file - where the file name identifies diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 0981d313064f..db3a594ee1e4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -45,6 +45,7 @@ #include /* getrlimit */ #include #include +#include #include "jsmn.h" #include "json.h" #include "jevents.h" @@ -351,6 +352,81 @@ static int print_events_table_entry(void *data, char *name, char *event, return 0; } +struct event_struct { + struct list_head list; + char *name; + char *event; + char *desc; + char *long_desc; + char *pmu; + char *unit; + char *perpkg; + char *metric_expr; + char *metric_name; + char *metric_group; +}; + +#define ADD_EVENT_FIELD(field) do { if (field) { \ + es->field = strdup(field); \ + if (!es->field) \ + goto out_free; \ +} } while (0) + +#define FREE_EVENT_FIELD(field) free(es->field) + +#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\ + *field = strdup(es->field); \ + if (!*field) \ + return -ENOMEM; \ +} } while (0) + +#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do { \ + op(name); \ + op(event); \ + op(desc); \ + op(long_desc); \ + op(pmu); \ + op(unit); \ + op(perpkg); \ + op(metric_expr); \ + op(metric_name); \ + op(metric_group); \ +} while (0) + +static LIST_HEAD(arch_std_events); + +static void free_arch_std_events(void) +{ + struct event_struct *es, *next; + + list_for_each_entry_safe(es, next, &arch_std_events, list) { + FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); + list_del(&es->list); + free(es); + } +} + +static int save_arch_std_events(void *data, char *name, char *event, + char *desc, char *long_desc, char *pmu, + char *unit, char *perpkg, char *metric_expr, + char *metric_name, char *metric_group) +{ + struct event_struct *es; + struct stat *sb = data; + + es = malloc(sizeof(*es)); + if (!es) + return -ENOMEM; + memset(es, 0, sizeof(*es)); + FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD); + list_add_tail(&es->list, &arch_std_events); + return 0; +out_free: + FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); + free(es); + return -ENOMEM; +} + static void print_events_table_suffix(FILE *outfp) { fprintf(outfp, "{\n"); @@ -392,6 +468,32 @@ static char *real_event(const char *name, char *event) return event; } +static int +try_fixup(const char *fn, char *arch_std, char **event, char **desc, + char **name, char **long_desc, char **pmu, char **filter, + char **perpkg, char **unit, char **metric_expr, char **metric_name, + char **metric_group, unsigned long long eventcode) +{ + /* try to find matching event from arch standard values */ + struct event_struct *es; + + list_for_each_entry(es, &arch_std_events, list) { + if (!strcmp(arch_std, es->name)) { + if (!eventcode && es->event) { + /* allow EventCode to be overridden */ + free(*event); + *event = NULL; + } + FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); + return 0; + } + } + + pr_err("%s: could not find matching %s for %s\n", + prog, arch_std, fn); + return -1; +} + /* Call func with each event in the json file */ int json_events(const char *fn, int (*func)(void *data, char *name, char *event, char *desc, @@ -427,6 +529,7 @@ int json_events(const char *fn, char *metric_expr = NULL; char *metric_name = NULL; char *metric_group = NULL; + char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -512,6 +615,10 @@ int json_events(const char *fn, addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) *s = tolower(*s); + } else if (json_streq(map, field, "ArchStdEvent")) { + addfield(map, &arch_std, "", "", val); + for (s = arch_std; *s; s++) + *s = tolower(*s); } /* ignore unknown fields */ } @@ -536,8 +643,21 @@ int json_events(const char *fn, if (name) fixname(name); + if (arch_std) { + /* + * An arch standard event is referenced, so try to + * fixup any unassigned values. + */ + err = try_fixup(fn, arch_std, &event, &desc, &name, + &long_desc, &pmu, &filter, &perpkg, + &unit, &metric_expr, &metric_name, + &metric_group, eventcode); + if (err) + goto free_strings; + } err = func(data, name, real_event(name, event), desc, long_desc, pmu, unit, perpkg, metric_expr, metric_name, metric_group); +free_strings: free(event); free(desc); free(name); @@ -550,6 +670,8 @@ int json_events(const char *fn, free(metric_expr); free(metric_name); free(metric_group); + free(arch_std); + if (err) break; tok += j; @@ -777,6 +899,32 @@ static int is_leaf_dir(const char *fpath) return res; } +static int is_json_file(const char *name) +{ + const char *suffix; + + if (strlen(name) < 5) + return 0; + + suffix = name + strlen(name) - 5; + + if (strncmp(suffix, ".json", 5) == 0) + return 1; + return 0; +} + +static int preprocess_arch_std_files(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int level = ftwbuf->level; + int is_file = typeflag == FTW_F; + + if (level == 1 && is_file && is_json_file(fpath)) + return json_events(fpath, save_arch_std_events, (void *)sb); + + return 0; +} + static int process_one_file(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { @@ -854,9 +1002,7 @@ static int process_one_file(const char *fpath, const struct stat *sb, * ignore it. It could be a readme.txt for instance. */ if (is_file) { - char *suffix = bname + strlen(bname) - 5; - - if (strncmp(suffix, ".json", 5)) { + if (!is_json_file(bname)) { pr_info("%s: Ignoring file without .json suffix %s\n", prog, fpath); return 0; @@ -962,12 +1108,26 @@ int main(int argc, char *argv[]) maxfds = get_maxfds(); mapfile = NULL; + rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); + if (rc && verbose) { + pr_info("%s: Error preprocessing arch standard files %s\n", + prog, ldirname); + goto empty_map; + } else if (rc < 0) { + /* Make build fail */ + free_arch_std_events(); + return 1; + } else if (rc) { + goto empty_map; + } + rc = nftw(ldirname, process_one_file, maxfds, 0); if (rc && verbose) { pr_info("%s: Error walking file tree %s\n", prog, ldirname); goto empty_map; } else if (rc < 0) { /* Make build fail */ + free_arch_std_events(); return 1; } else if (rc) { goto empty_map; @@ -992,5 +1152,6 @@ int main(int argc, char *argv[]) empty_map: fclose(eventsfp); create_empty_mapping(output_file); + free_arch_std_events(); return 0; } From 360b7b03afee042d71ab54ba6ea55daf53edf538 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:33 +0800 Subject: [PATCH 20/47] perf vendor events arm64: Add armv8-recommended.json Add JSON for ARMv8 IMPLEMENTATION DEFINED recommended events. The JSON is copied from ARMv8 architecture reference manual, available here: https://static.docs.arm.com/ddi0487/ca/DDI0487C_a_armv8_arm.pdf Originally-from: Shaokun Zhang Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-9-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/armv8-recommended.json | 452 ++++++++++++++++++ 1 file changed, 452 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/armv8-recommended.json diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json new file mode 100644 index 000000000000..6328828c018c --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json @@ -0,0 +1,452 @@ +[ + { + "PublicDescription": "Attributable Level 1 data cache access, read", + "EventCode": "0x40", + "EventName": "L1D_CACHE_RD", + "BriefDescription": "L1D cache access, read" + }, + { + "PublicDescription": "Attributable Level 1 data cache access, write", + "EventCode": "0x41", + "EventName": "L1D_CACHE_WR", + "BriefDescription": "L1D cache access, write" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, read", + "EventCode": "0x42", + "EventName": "L1D_CACHE_REFILL_RD", + "BriefDescription": "L1D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, write", + "EventCode": "0x43", + "EventName": "L1D_CACHE_REFILL_WR", + "BriefDescription": "L1D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, inner", + "EventCode": "0x44", + "EventName": "L1D_CACHE_REFILL_INNER", + "BriefDescription": "L1D cache refill, inner" + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, outer", + "EventCode": "0x45", + "EventName": "L1D_CACHE_REFILL_OUTER", + "BriefDescription": "L1D cache refill, outer" + }, + { + "PublicDescription": "Attributable Level 1 data cache Write-Back, victim", + "EventCode": "0x46", + "EventName": "L1D_CACHE_WB_VICTIM", + "BriefDescription": "L1D cache Write-Back, victim" + }, + { + "PublicDescription": "Level 1 data cache Write-Back, cleaning and coherency", + "EventCode": "0x47", + "EventName": "L1D_CACHE_WB_CLEAN", + "BriefDescription": "L1D cache Write-Back, cleaning and coherency" + }, + { + "PublicDescription": "Attributable Level 1 data cache invalidate", + "EventCode": "0x48", + "EventName": "L1D_CACHE_INVAL", + "BriefDescription": "L1D cache invalidate" + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, read", + "EventCode": "0x4C", + "EventName": "L1D_TLB_REFILL_RD", + "BriefDescription": "L1D tlb refill, read" + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, write", + "EventCode": "0x4D", + "EventName": "L1D_TLB_REFILL_WR", + "BriefDescription": "L1D tlb refill, write" + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, read", + "EventCode": "0x4E", + "EventName": "L1D_TLB_RD", + "BriefDescription": "L1D tlb access, read" + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, write", + "EventCode": "0x4F", + "EventName": "L1D_TLB_WR", + "BriefDescription": "L1D tlb access, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache access, read", + "EventCode": "0x50", + "EventName": "L2D_CACHE_RD", + "BriefDescription": "L2D cache access, read" + }, + { + "PublicDescription": "Attributable Level 2 data cache access, write", + "EventCode": "0x51", + "EventName": "L2D_CACHE_WR", + "BriefDescription": "L2D cache access, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache refill, read", + "EventCode": "0x52", + "EventName": "L2D_CACHE_REFILL_RD", + "BriefDescription": "L2D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 2 data cache refill, write", + "EventCode": "0x53", + "EventName": "L2D_CACHE_REFILL_WR", + "BriefDescription": "L2D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 2 data cache Write-Back, victim", + "EventCode": "0x56", + "EventName": "L2D_CACHE_WB_VICTIM", + "BriefDescription": "L2D cache Write-Back, victim" + }, + { + "PublicDescription": "Level 2 data cache Write-Back, cleaning and coherency", + "EventCode": "0x57", + "EventName": "L2D_CACHE_WB_CLEAN", + "BriefDescription": "L2D cache Write-Back, cleaning and coherency" + }, + { + "PublicDescription": "Attributable Level 2 data cache invalidate", + "EventCode": "0x58", + "EventName": "L2D_CACHE_INVAL", + "BriefDescription": "L2D cache invalidate" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB refill, read", + "EventCode": "0x5c", + "EventName": "L2D_TLB_REFILL_RD", + "BriefDescription": "L2D cache refill, read" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB refill, write", + "EventCode": "0x5d", + "EventName": "L2D_TLB_REFILL_WR", + "BriefDescription": "L2D cache refill, write" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB access, read", + "EventCode": "0x5e", + "EventName": "L2D_TLB_RD", + "BriefDescription": "L2D cache access, read" + }, + { + "PublicDescription": "Attributable Level 2 data or unified TLB access, write", + "EventCode": "0x5f", + "EventName": "L2D_TLB_WR", + "BriefDescription": "L2D cache access, write" + }, + { + "PublicDescription": "Bus access read", + "EventCode": "0x60", + "EventName": "BUS_ACCESS_RD", + "BriefDescription": "Bus access read" + }, + { + "PublicDescription": "Bus access write", + "EventCode": "0x61", + "EventName": "BUS_ACCESS_WR", + "BriefDescription": "Bus access write" + } + { + "PublicDescription": "Bus access, Normal, Cacheable, Shareable", + "EventCode": "0x62", + "EventName": "BUS_ACCESS_SHARED", + "BriefDescription": "Bus access, Normal, Cacheable, Shareable" + } + { + "PublicDescription": "Bus access, not Normal, Cacheable, Shareable", + "EventCode": "0x63", + "EventName": "BUS_ACCESS_NOT_SHARED", + "BriefDescription": "Bus access, not Normal, Cacheable, Shareable" + } + { + "PublicDescription": "Bus access, Normal", + "EventCode": "0x64", + "EventName": "BUS_ACCESS_NORMAL", + "BriefDescription": "Bus access, Normal" + } + { + "PublicDescription": "Bus access, peripheral", + "EventCode": "0x65", + "EventName": "BUS_ACCESS_PERIPH", + "BriefDescription": "Bus access, peripheral" + } + { + "PublicDescription": "Data memory access, read", + "EventCode": "0x66", + "EventName": "MEM_ACCESS_RD", + "BriefDescription": "Data memory access, read" + } + { + "PublicDescription": "Data memory access, write", + "EventCode": "0x67", + "EventName": "MEM_ACCESS_WR", + "BriefDescription": "Data memory access, write" + } + { + "PublicDescription": "Unaligned access, read", + "EventCode": "0x68", + "EventName": "UNALIGNED_LD_SPEC", + "BriefDescription": "Unaligned access, read" + } + { + "PublicDescription": "Unaligned access, write", + "EventCode": "0x69", + "EventName": "UNALIGNED_ST_SPEC", + "BriefDescription": "Unaligned access, write" + } + { + "PublicDescription": "Unaligned access", + "EventCode": "0x6a", + "EventName": "UNALIGNED_LDST_SPEC", + "BriefDescription": "Unaligned access" + } + { + "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX", + "EventCode": "0x6c", + "EventName": "LDREX_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass", + "EventCode": "0x6d", + "EventName": "STREX_PASS_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail", + "EventCode": "0x6e", + "EventName": "STREX_FAIL_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail" + } + { + "PublicDescription": "Exclusive operation speculatively executed, STREX or STX", + "EventCode": "0x6f", + "EventName": "STREX_SPEC", + "BriefDescription": "Exclusive operation speculatively executed, STREX or STX" + } + { + "PublicDescription": "Operation speculatively executed, load", + "EventCode": "0x70", + "EventName": "LD_SPEC", + "BriefDescription": "Operation speculatively executed, load" + } + { + "PublicDescription": "Operation speculatively executed, store" + "EventCode": "0x71", + "EventName": "ST_SPEC", + "BriefDescription": "Operation speculatively executed, store" + } + { + "PublicDescription": "Operation speculatively executed, load or store", + "EventCode": "0x72", + "EventName": "LDST_SPEC", + "BriefDescription": "Operation speculatively executed, load or store" + } + { + "PublicDescription": "Operation speculatively executed, integer data processing", + "EventCode": "0x73", + "EventName": "DP_SPEC", + "BriefDescription": "Operation speculatively executed, integer data processing" + } + { + "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction", + "EventCode": "0x74", + "EventName": "ASE_SPEC", + "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction", + } + { + "PublicDescription": "Operation speculatively executed, floating-point instruction", + "EventCode": "0x75", + "EventName": "VFP_SPEC", + "BriefDescription": "Operation speculatively executed, floating-point instruction" + } + { + "PublicDescription": "Operation speculatively executed, software change of the PC", + "EventCode": "0x76", + "EventName": "PC_WRITE_SPEC", + "BriefDescription": "Operation speculatively executed, software change of the PC" + } + { + "PublicDescription": "Operation speculatively executed, Cryptographic instruction", + "EventCode": "0x77", + "EventName": "CRYPTO_SPEC", + "BriefDescription": "Operation speculatively executed, Cryptographic instruction" + } + { + "PublicDescription": "Branch speculatively executed, immediate branch" + "EventCode": "0x78", + "EventName": "BR_IMMED_SPEC", + "BriefDescription": "Branch speculatively executed, immediate branch" + } + { + "PublicDescription": "Branch speculatively executed, procedure return" + "EventCode": "0x79", + "EventName": "BR_RETURN_SPEC", + "BriefDescription": "Branch speculatively executed, procedure return" + } + { + "PublicDescription": "Branch speculatively executed, indirect branch" + "EventCode": "0x7a", + "EventName": "BR_INDIRECT_SPEC", + "BriefDescription": "Branch speculatively executed, indirect branch" + } + { + "PublicDescription": "Barrier speculatively executed, ISB" + "EventCode": "0x7c", + "EventName": "ISB_SPEC", + "BriefDescription": "Barrier speculatively executed, ISB" + } + { + "PublicDescription": "Barrier speculatively executed, DSB" + "EventCode": "0x7d", + "EventName": "DSB_SPEC", + "BriefDescription": "Barrier speculatively executed, DSB" + } + { + "PublicDescription": "Barrier speculatively executed, DMB" + "EventCode": "0x7e", + "EventName": "DMB_SPEC", + "BriefDescription": "Barrier speculatively executed, DMB" + } + { + "PublicDescription": "Exception taken, Other synchronous" + "EventCode": "0x81", + "EventName": "EXC_UNDEF", + "BriefDescription": "Exception taken, Other synchronous" + } + { + "PublicDescription": "Exception taken, Supervisor Call" + "EventCode": "0x82", + "EventName": "EXC_SVC", + "BriefDescription": "Exception taken, Supervisor Call" + } + { + "PublicDescription": "Exception taken, Instruction Abort" + "EventCode": "0x83", + "EventName": "EXC_PABORT", + "BriefDescription": "Exception taken, Instruction Abort" + } + { + "PublicDescription": "Exception taken, Data Abort and SError" + "EventCode": "0x84", + "EventName": "EXC_DABORT", + "BriefDescription": "Exception taken, Data Abort and SError" + } + { + "PublicDescription": "Exception taken, IRQ" + "EventCode": "0x86", + "EventName": "EXC_IRQ", + "BriefDescription": "Exception taken, IRQ" + } + { + "PublicDescription": "Exception taken, FIQ" + "EventCode": "0x87", + "EventName": "EXC_FIQ", + "BriefDescription": "Exception taken, FIQ" + } + { + "PublicDescription": "Exception taken, Secure Monitor Call" + "EventCode": "0x88", + "EventName": "EXC_SMC", + "BriefDescription": "Exception taken, Secure Monitor Call" + } + { + "PublicDescription": "Exception taken, Hypervisor Call" + "EventCode": "0x8a", + "EventName": "EXC_HVC", + "BriefDescription": "Exception taken, Hypervisor Call" + } + { + "PublicDescription": "Exception taken, Instruction Abort not taken locally" + "EventCode": "0x8b", + "EventName": "EXC_TRAP_PABORT", + "BriefDescription": "Exception taken, Instruction Abort not taken locally" + } + { + "PublicDescription": "Exception taken, Data Abort or SError not taken locally" + "EventCode": "0x8c", + "EventName": "EXC_TRAP_DABORT", + "BriefDescription": "Exception taken, Data Abort or SError not taken locally" + } + { + "PublicDescription": "Exception taken, Other traps not taken locally" + "EventCode": "0x8d", + "EventName": "EXC_TRAP_OTHER", + "BriefDescription": "Exception taken, Other traps not taken locally" + } + { + "PublicDescription": "Exception taken, IRQ not taken locally" + "EventCode": "0x8e", + "EventName": "EXC_TRAP_IRQ", + "BriefDescription": "Exception taken, IRQ not taken locally" + } + { + "PublicDescription": "Exception taken, FIQ not taken locally" + "EventCode": "0x8f", + "EventName": "EXC_TRAP_FIQ", + "BriefDescription": "Exception taken, FIQ not taken locally" + } + { + "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire" + "EventCode": "0x90", + "EventName": "RC_LD_SPEC", + "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire" + } + { + "PublicDescription": "Release consistency operation speculatively executed, Store-Release" + "EventCode": "0x91", + "EventName": "RC_ST_SPEC", + "BriefDescription": "Release consistency operation speculatively executed, Store-Release" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, read" + "EventCode": "0xa0", + "EventName": "L3D_CACHE_RD", + "BriefDescription": "Attributable Level 3 data or unified cache access, read" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, write" + "EventCode": "0xa1", + "EventName": "L3D_CACHE_WR", + "BriefDescription": "Attributable Level 3 data or unified cache access, write" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache refill, read" + "EventCode": "0xa2", + "EventName": "L3D_CACHE_REFILL_RD", + "BriefDescription": "Attributable Level 3 data or unified cache refill, read" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache refill, write" + "EventCode": "0xa3", + "EventName": "L3D_CACHE_REFILL_WR", + "BriefDescription": "Attributable Level 3 data or unified cache refill, write" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim" + "EventCode": "0xa6", + "EventName": "L3D_CACHE_WB_VICTIM", + "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" + "EventCode": "0xa7", + "EventName": "L3D_CACHE_WB_CLEAN", + "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" + } + { + "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate" + "EventCode": "0xa8", + "EventName": "L3D_CACHE_INVAL", + "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate" + } +] From ae43053bd2595dc98f0909505dc1d7e1ed8bd239 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:34 +0800 Subject: [PATCH 21/47] perf vendor events arm64: Fixup ThunderX2 to use recommended events This patch fixes the Cavium ThunderX2 JSON to use event definitions from the ARMv8 recommended events. Signed-off-by: John Garry Tested-by: Ganapatrao Kulkarni Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-10-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/cavium/thunderx2/core-imp-def.json | 50 ++++--------------- 1 file changed, 10 insertions(+), 40 deletions(-) diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json index 2db45c40ebc7..bc03c06c3918 100644 --- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json @@ -1,62 +1,32 @@ [ { - "PublicDescription": "Attributable Level 1 data cache access, read", - "EventCode": "0x40", - "EventName": "l1d_cache_rd", - "BriefDescription": "L1D cache read", + "ArchStdEvent": "L1D_CACHE_RD", }, { - "PublicDescription": "Attributable Level 1 data cache access, write ", - "EventCode": "0x41", - "EventName": "l1d_cache_wr", - "BriefDescription": "L1D cache write", + "ArchStdEvent": "L1D_CACHE_WR", }, { - "PublicDescription": "Attributable Level 1 data cache refill, read", - "EventCode": "0x42", - "EventName": "l1d_cache_refill_rd", - "BriefDescription": "L1D cache refill read", + "ArchStdEvent": "L1D_CACHE_REFILL_RD", }, { - "PublicDescription": "Attributable Level 1 data cache refill, write", - "EventCode": "0x43", - "EventName": "l1d_cache_refill_wr", - "BriefDescription": "L1D refill write", + "ArchStdEvent": "L1D_CACHE_REFILL_WR", }, { - "PublicDescription": "Attributable Level 1 data TLB refill, read", - "EventCode": "0x4C", - "EventName": "l1d_tlb_refill_rd", - "BriefDescription": "L1D tlb refill read", + "ArchStdEvent": "L1D_TLB_REFILL_RD", }, { - "PublicDescription": "Attributable Level 1 data TLB refill, write", - "EventCode": "0x4D", - "EventName": "l1d_tlb_refill_wr", - "BriefDescription": "L1D tlb refill write", + "ArchStdEvent": "L1D_TLB_REFILL_WR", }, { - "PublicDescription": "Attributable Level 1 data or unified TLB access, read", - "EventCode": "0x4E", - "EventName": "l1d_tlb_rd", - "BriefDescription": "L1D tlb read", + "ArchStdEvent": "L1D_TLB_RD", }, { - "PublicDescription": "Attributable Level 1 data or unified TLB access, write", - "EventCode": "0x4F", - "EventName": "l1d_tlb_wr", - "BriefDescription": "L1D tlb write", + "ArchStdEvent": "L1D_TLB_WR", }, { - "PublicDescription": "Bus access read", - "EventCode": "0x60", - "EventName": "bus_access_rd", - "BriefDescription": "Bus access read", + "ArchStdEvent": "BUS_ACCESS_RD", }, { - "PublicDescription": "Bus access write", - "EventCode": "0x61", - "EventName": "bus_access_wr", - "BriefDescription": "Bus access write", + "ArchStdEvent": "BUS_ACCESS_WR", } ] From afe4d089621d4d90ac0e089b83752ea4515325ac Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:35 +0800 Subject: [PATCH 22/47] perf vendor events arm64: fixup A53 to use recommended events This patch fixes the ARM Cortex-A53 json to use event definition from the ARMv8 recommended events. In addition to this change, other changes were made: - remove stray ',' - remove mirrored events in memory.json and bus.json - fixed indentation to be consistent with other ARM JSONs Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-11-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/cortex-a53/branch.json | 14 +++--- .../arch/arm64/arm/cortex-a53/bus.json | 22 ++-------- .../arch/arm64/arm/cortex-a53/cache.json | 40 ++++++++--------- .../arch/arm64/arm/cortex-a53/memory.json | 14 +----- .../arch/arm64/arm/cortex-a53/other.json | 44 +++++++++---------- .../arch/arm64/arm/cortex-a53/pipeline.json | 20 ++++----- 6 files changed, 62 insertions(+), 92 deletions(-) diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json index 3b6208763e50..0b0e6b26605b 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json @@ -1,25 +1,23 @@ [ - {, - "EventCode": "0x7A", - "EventName": "BR_INDIRECT_SPEC", - "BriefDescription": "Branch speculatively executed - Indirect branch" + { + "ArchStdEvent": "BR_INDIRECT_SPEC", }, - {, + { "EventCode": "0xC9", "EventName": "BR_COND", "BriefDescription": "Conditional branch executed" }, - {, + { "EventCode": "0xCA", "EventName": "BR_INDIRECT_MISPRED", "BriefDescription": "Indirect branch mispredicted" }, - {, + { "EventCode": "0xCB", "EventName": "BR_INDIRECT_MISPRED_ADDR", "BriefDescription": "Indirect branch mispredicted because of address miscompare" }, - {, + { "EventCode": "0xCC", "EventName": "BR_COND_MISPRED", "BriefDescription": "Conditional branch mispredicted" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json index 480d9f7460ab..ce33b2553277 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json @@ -1,22 +1,8 @@ [ - {, - "EventCode": "0x60", - "EventName": "BUS_ACCESS_LD", - "BriefDescription": "Bus access - Read" + { + "ArchStdEvent": "BUS_ACCESS_RD", }, - {, - "EventCode": "0x61", - "EventName": "BUS_ACCESS_ST", - "BriefDescription": "Bus access - Write" - }, - {, - "EventCode": "0xC0", - "EventName": "EXT_MEM_REQ", - "BriefDescription": "External memory request" - }, - {, - "EventCode": "0xC1", - "EventName": "EXT_MEM_REQ_NC", - "BriefDescription": "Non-cacheable external memory request" + { + "ArchStdEvent": "BUS_ACCESS_WR", } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json index 11baad6344b9..5dfbec43c9f9 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json @@ -1,27 +1,27 @@ [ - {, - "EventCode": "0xC2", - "EventName": "PREFETCH_LINEFILL", - "BriefDescription": "Linefill because of prefetch" + { + "EventCode": "0xC2", + "EventName": "PREFETCH_LINEFILL", + "BriefDescription": "Linefill because of prefetch" }, - {, - "EventCode": "0xC3", - "EventName": "PREFETCH_LINEFILL_DROP", - "BriefDescription": "Instruction Cache Throttle occurred" + { + "EventCode": "0xC3", + "EventName": "PREFETCH_LINEFILL_DROP", + "BriefDescription": "Instruction Cache Throttle occurred" }, - {, - "EventCode": "0xC4", - "EventName": "READ_ALLOC_ENTER", - "BriefDescription": "Entering read allocate mode" + { + "EventCode": "0xC4", + "EventName": "READ_ALLOC_ENTER", + "BriefDescription": "Entering read allocate mode" }, - {, - "EventCode": "0xC5", - "EventName": "READ_ALLOC", - "BriefDescription": "Read allocate mode" + { + "EventCode": "0xC5", + "EventName": "READ_ALLOC", + "BriefDescription": "Read allocate mode" }, - {, - "EventCode": "0xC8", - "EventName": "EXT_SNOOP", - "BriefDescription": "SCU Snooped data from another CPU for this CPU" + { + "EventCode": "0xC8", + "EventName": "EXT_SNOOP", + "BriefDescription": "SCU Snooped data from another CPU for this CPU" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json index 480d9f7460ab..25ae642ba381 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json @@ -1,20 +1,10 @@ [ - {, - "EventCode": "0x60", - "EventName": "BUS_ACCESS_LD", - "BriefDescription": "Bus access - Read" - }, - {, - "EventCode": "0x61", - "EventName": "BUS_ACCESS_ST", - "BriefDescription": "Bus access - Write" - }, - {, + { "EventCode": "0xC0", "EventName": "EXT_MEM_REQ", "BriefDescription": "External memory request" }, - {, + { "EventCode": "0xC1", "EventName": "EXT_MEM_REQ_NC", "BriefDescription": "Non-cacheable external memory request" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json index 73a22402d003..6cc6cbd7bf0b 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json @@ -1,32 +1,28 @@ [ - {, - "EventCode": "0x86", - "EventName": "EXC_IRQ", - "BriefDescription": "Exception taken, IRQ" + { + "ArchStdEvent": "EXC_IRQ", }, - {, - "EventCode": "0x87", - "EventName": "EXC_FIQ", - "BriefDescription": "Exception taken, FIQ" + { + "ArchStdEvent": "EXC_FIQ", }, - {, - "EventCode": "0xC6", - "EventName": "PRE_DECODE_ERR", - "BriefDescription": "Pre-decode error" + { + "EventCode": "0xC6", + "EventName": "PRE_DECODE_ERR", + "BriefDescription": "Pre-decode error" }, - {, - "EventCode": "0xD0", - "EventName": "L1I_CACHE_ERR", - "BriefDescription": "L1 Instruction Cache (data or tag) memory error" + { + "EventCode": "0xD0", + "EventName": "L1I_CACHE_ERR", + "BriefDescription": "L1 Instruction Cache (data or tag) memory error" }, - {, - "EventCode": "0xD1", - "EventName": "L1D_CACHE_ERR", - "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" + { + "EventCode": "0xD1", + "EventName": "L1D_CACHE_ERR", + "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable" }, - {, - "EventCode": "0xD2", - "EventName": "TLB_ERR", - "BriefDescription": "TLB memory error" + { + "EventCode": "0xD2", + "EventName": "TLB_ERR", + "BriefDescription": "TLB memory error" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json index 3149fb90555a..f45a6b5d0025 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json @@ -1,50 +1,50 @@ [ - {, + { "EventCode": "0xC7", "EventName": "STALL_SB_FULL", "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full" }, - {, + { "EventCode": "0xE0", "EventName": "OTHER_IQ_DEP_STALL", "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error" }, - {, + { "EventCode": "0xE1", "EventName": "IC_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed" }, - {, + { "EventCode": "0xE2", "EventName": "IUTLB_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed" }, - {, + { "EventCode": "0xE3", "EventName": "DECODE_DEP_STALL", "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed" }, - {, + { "EventCode": "0xE4", "EventName": "OTHER_INTERLOCK_STALL", "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction" }, - {, + { "EventCode": "0xE5", "EventName": "AGU_DEP_STALL", "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU" }, - {, + { "EventCode": "0xE6", "EventName": "SIMD_DEP_STALL", "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation." }, - {, + { "EventCode": "0xE7", "EventName": "LD_DEP_STALL", "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss" }, - {, + { "EventCode": "0xE8", "EventName": "ST_DEP_STALL", "BriefDescription": "Cycles there is a stall in the Wr stage because of a store" From 3d4caec1600e0bf34600a7b700599a20df03629e Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 8 Mar 2018 18:58:36 +0800 Subject: [PATCH 23/47] perf vendor events arm64: add HiSilicon hip08 JSON file This patch adds the HiSilicon hip08 JSON file. This platform follows the ARMv8 recommended IMPLEMENTATION DEFINED events, where applicable. Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1520506716-197429-12-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/hisilicon/hip08/core-imp-def.json | 122 ++++++++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 2 files changed, 123 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json new file mode 100644 index 000000000000..9f0f15d15f75 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json @@ -0,0 +1,122 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L1D_TLB_RD", + }, + { + "ArchStdEvent": "L1D_TLB_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL", + }, + { + "PublicDescription": "Level 1 instruction cache prefetch access count", + "EventCode": "0x102e", + "EventName": "L1I_CACHE_PRF", + "BriefDescription": "L1I cache prefetch access count", + }, + { + "PublicDescription": "Level 1 instruction cache miss due to prefetch access count", + "EventCode": "0x102f", + "EventName": "L1I_CACHE_PRF_REFILL", + "BriefDescription": "L1I cache miss due to prefetch access count", + }, + { + "PublicDescription": "Instruction queue is empty", + "EventCode": "0x1043", + "EventName": "IQ_IS_EMPTY", + "BriefDescription": "Instruction queue is empty", + }, + { + "PublicDescription": "Instruction fetch stall cycles", + "EventCode": "0x1044", + "EventName": "IF_IS_STALL", + "BriefDescription": "Instruction fetch stall cycles", + }, + { + "PublicDescription": "Instructions can receive, but not send", + "EventCode": "0x2014", + "EventName": "FETCH_BUBBLE", + "BriefDescription": "Instructions can receive, but not send", + }, + { + "PublicDescription": "Prefetch request from LSU", + "EventCode": "0x6013", + "EventName": "PRF_REQ", + "BriefDescription": "Prefetch request from LSU", + }, + { + "PublicDescription": "Hit on prefetched data", + "EventCode": "0x6014", + "EventName": "HIT_ON_PRF", + "BriefDescription": "Hit on prefetched data", + }, + { + "PublicDescription": "Cycles of that the number of issuing micro operations are less than 4", + "EventCode": "0x7001", + "EventName": "EXE_STALL_CYCLE", + "BriefDescription": "Cycles of that the number of issue ups are less than 4", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile any load operation is not resolved", + "EventCode": "0x7004", + "EventName": "MEM_STALL_ANYLOAD", + "BriefDescription": "No any micro operation is issued and meanwhile any load operation is not resolved", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill", + "EventCode": "0x7006", + "EventName": "MEM_STALL_L1MISS", + "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill", + }, + { + "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache", + "EventCode": "0x7007", + "EventName": "MEM_STALL_L2MISS", + "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index cf14e23b6404..8f11aeb003a9 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -14,3 +14,4 @@ #Family-model,Version,Filename,EventType 0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core 0x00000000420f5160,v1,cavium/thunderx2,core +0x00000000480fd010,v1,hisilicon/hip08,core From fca32340a5e8b896f57d41fd94b8b1701df25eb1 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 8 Mar 2018 15:57:35 +0100 Subject: [PATCH 24/47] perf stat: Fix core dump when flag T is used Executing command 'perf stat -T -- ls' dumps core on x86 and s390. Here is the call back chain (done on x86): # gdb ./perf .... (gdb) r stat -T -- ls ... Program received signal SIGSEGV, Segmentation fault. 0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6 (gdb) where #0 0x00007ffff56d1963 in vasprintf () from /lib64/libc.so.6 #1 0x00007ffff56ae484 in asprintf () from /lib64/libc.so.6 #2 0x00000000004f1982 in __parse_events_add_pmu (parse_state=0x7fffffffd580, list=0xbfb970, name=0xbf3ef0 "cpu", head_config=0xbfb930, auto_merge_stats=false) at util/parse-events.c:1233 #3 0x00000000004f1c8e in parse_events_add_pmu (parse_state=0x7fffffffd580, list=0xbfb970, name=0xbf3ef0 "cpu", head_config=0xbfb930) at util/parse-events.c:1288 #4 0x0000000000537ce3 in parse_events_parse (_parse_state=0x7fffffffd580, scanner=0xbf4210) at util/parse-events.y:234 #5 0x00000000004f2c7a in parse_events__scanner (str=0x6b66c0 "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}", parse_state=0x7fffffffd580, start_token=258) at util/parse-events.c:1673 #6 0x00000000004f2e23 in parse_events (evlist=0xbe9990, str=0x6b66c0 "task-clock,{instructions,cycles,cpu/cycles-t/,cpu/tx-start/}", err=0x0) at util/parse-events.c:1713 #7 0x000000000044e137 in add_default_attributes () at builtin-stat.c:2281 #8 0x000000000044f7b5 in cmd_stat (argc=1, argv=0x7fffffffe3b0) at builtin-stat.c:2828 #9 0x00000000004c8b0f in run_builtin (p=0xab01a0 , argc=4, argv=0x7fffffffe3b0) at perf.c:297 #10 0x00000000004c8d7c in handle_internal_command (argc=4, argv=0x7fffffffe3b0) at perf.c:349 #11 0x00000000004c8ece in run_argv (argcp=0x7fffffffe20c, argv=0x7fffffffe200) at perf.c:393 #12 0x00000000004c929c in main (argc=4, argv=0x7fffffffe3b0) at perf.c:537 (gdb) It turns out that a NULL pointer is referenced. Here are the function calls: ... cmd_stat() +---> add_default_attributes() +---> parse_events(evsel_list, transaction_attrs, NULL); 3rd parameter set to NULL Function parse_events(xx, xx, struct parse_events_error *err) dives into a bison generated scanner and creates parser state information for it first: struct parse_events_state parse_state = { .list = LIST_HEAD_INIT(parse_state.list), .idx = evlist->nr_entries, .error = err, <--- NULL POINTER !!! .evlist = evlist, }; Now various functions inside the bison scanner are called to end up in __parse_events_add_pmu(struct parse_events_state *parse_state, ..) with first parameter being a pointer to above structure definition. Now the PMU event name is not found (because being executed in a VM) and this function tries to create an error message with asprintf(&parse_state->error.str, ....) which references a NULL pointer and dumps core. Fix this by providing a pointer to the necessary error information instead of NULL. Technically only the else part is needed to avoid the core dump, just lets be safe... Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180308145735.64717-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0fa9ea3a6d92..f5c454855908 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2331,11 +2331,16 @@ static int add_default_attributes(void) return 0; if (transaction_run) { + struct parse_events_error errinfo; + if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) - err = parse_events(evsel_list, transaction_attrs, NULL); + err = parse_events(evsel_list, transaction_attrs, + &errinfo); else - err = parse_events(evsel_list, transaction_limited_attrs, NULL); + err = parse_events(evsel_list, + transaction_limited_attrs, + &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); return -1; From 39ce7fb31530c6d4648919e03e16c5e9286a5940 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 7 Mar 2018 16:24:30 +0100 Subject: [PATCH 25/47] perf report: Show zero counters as well in 'perf report --stat' When recently using 'perf report --stat' it was not clear to me from the output whether a particular statistics field (LOST_SAMPLES) was not present, or just zero: fomalhaut:~> perf report --stat Aggregated stats: TOTAL events: 495984 MMAP events: 85 COMM events: 3389 EXIT events: 1605 THROTTLE events: 2 UNTHROTTLE events: 2 FORK events: 3377 SAMPLE events: 472629 MMAP2 events: 14753 FINISHED_ROUND events: 139 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 I had to check the output several times to ascertain that I'm not misreading the output, that the field didn't change and that I didn't misremember the name. In fact I had to look into the perf source to make sure that zero fields are indeed not shown. With the patch applied: fomalhaut:~> perf report --stat Aggregated stats: TOTAL events: 495984 MMAP events: 85 LOST events: 0 COMM events: 3389 EXIT events: 1605 THROTTLE events: 2 UNTHROTTLE events: 2 FORK events: 3377 READ events: 0 SAMPLE events: 472629 MMAP2 events: 14753 AUX events: 0 ITRACE_START events: 0 LOST_SAMPLES events: 0 SWITCH events: 0 SWITCH_CPU_WIDE events: 0 NAMESPACES events: 0 ATTR events: 0 EVENT_TYPE events: 0 TRACING_DATA events: 0 BUILD_ID events: 0 FINISHED_ROUND events: 139 ID_INDEX events: 0 AUXTRACE_INFO events: 0 AUXTRACE events: 0 AUXTRACE_ERROR events: 0 THREAD_MAP events: 1 CPU_MAP events: 1 STAT_CONFIG events: 0 STAT events: 0 STAT_ROUND events: 0 EVENT_UPDATE events: 0 TIME_CONV events: 1 FEATURE events: 0 It's pretty clear at a glance that LOST_SAMPLES is present but zero. The original output can still be gotten via: fomalhaut:~> perf report --stat | grep -vw 0 Aggregated stats: TOTAL events: 495984 MMAP events: 85 COMM events: 3389 EXIT events: 1605 THROTTLE events: 2 UNTHROTTLE events: 2 FORK events: 3377 SAMPLE events: 472629 MMAP2 events: 14753 FINISHED_ROUND events: 139 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 So I don't think there's any real loss in functionality. Signed-off-by: Ingo Molnar Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20180307152430.7e5h7e657b7bgd7q@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 25dd1e0ecc58..6832fcb2e6ff 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -840,15 +840,11 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (stats->nr_events[i] == 0) - continue; - name = perf_event__name(i); if (!strcmp(name, "UNKNOWN")) continue; - ret += fprintf(fp, "%16s events: %10d\n", name, - stats->nr_events[i]); + ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]); } return ret; From a8685f088819d21cd5aea5de4c184de427c3625d Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Wed, 7 Mar 2018 16:38:03 +0530 Subject: [PATCH 26/47] perf vendor events arm64: Enable JSON events for ThunderX2 B0 There is MIDR change on ThunderX2 B0, adding an entry to mapfile to enable JSON events for B0. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: William Cohen Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20180307110803.32418-1-ganapatrao.kulkarni@cavium.com [ Fixup wrt recent patchset by John Garry ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 8f11aeb003a9..f03e26ecb658 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -14,4 +14,5 @@ #Family-model,Version,Filename,EventType 0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core 0x00000000420f5160,v1,cavium/thunderx2,core +0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000480fd010,v1,hisilicon/hip08,core From 3d20c6246690219881786de10d2dda93f616d0ac Mon Sep 17 00:00:00 2001 From: Martin Vuille Date: Sun, 11 Feb 2018 16:24:20 -0500 Subject: [PATCH 27/47] perf unwind: Unwind with libdw doesn't take symfs into account Path passed to libdw for unwinding doesn't include symfs path if specified, so unwinding fails because ELF file is not found. Similar to unwinding with libunwind, pass symsrc_filename instead of long_name. If there is no symsrc_filename, fallback to long_name. Signed-off-by: Martin Vuille Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/20180211212420.18388-1-jpmv27@aim.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 1e9c974faf67..8e969f28cc59 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -50,7 +50,7 @@ static int __report_module(struct addr_location *al, u64 ip, if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - dso->long_name, -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; From cff17205d6bd363703034510a84d66044aff176e Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Mon, 12 Mar 2018 19:25:57 +0800 Subject: [PATCH 28/47] perf record: Avoid duplicate call of perf_default_config() We have brought perf_default_config to the very beginning at main(), so it no need to call perf_default_config() once more for most of config in perf-record but only for record.call-graph. Signed-off-by: Yisheng Xie Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1520853957-36106-2-git-send-email-xieyisheng1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b81494587120..d33103291b02 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1279,10 +1279,12 @@ static int perf_record_config(const char *var, const char *value, void *cb) return -1; return 0; } - if (!strcmp(var, "record.call-graph")) - var = "call-graph.record-mode"; /* fall-through */ + if (!strcmp(var, "record.call-graph")) { + var = "call-graph.record-mode"; + return perf_default_config(var, value, cb); + } - return perf_default_config(var, value, cb); + return 0; } struct clockid_map { From a3a4a3b37c9b911af4c375b2475cea0fd2b84d38 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Mon, 12 Mar 2018 19:25:56 +0800 Subject: [PATCH 29/47] perf top: Fix top.call-graph config option reading When trying to add the "call-graph" variable for top into the .perfconfig file, like: [top] call-graph = fp I that perf_top_config() do not parse this variable. Fix it by calling perf_default_config() when the top.call-graph variable is set. Signed-off-by: Yisheng Xie Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Fixes: b8cbb349061e ("perf config: Bring perf_default_config to the very beginning at main()") Link: http://lkml.kernel.org/r/1520853957-36106-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0a26b56afcc5..113c298ed38b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1223,8 +1223,10 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { - if (!strcmp(var, "top.call-graph")) - var = "call-graph.record-mode"; /* fall-through */ + if (!strcmp(var, "top.call-graph")) { + var = "call-graph.record-mode"; + return perf_default_config(var, value, cb); + } if (!strcmp(var, "top.children")) { symbol_conf.cumulate_callchain = perf_config_bool(var, value); return 0; From 5eab5a7ee032acaab3da8fc95a3614fec14687ac Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Mar 2018 10:43:02 +0100 Subject: [PATCH 30/47] perf llvm: Display eBPF compiling command in debug output In addition to template, display also the real compile command line with all the variables substituted. llvm compiling command template: $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS ... llvm compiling command : /usr/bin/clang -D__KERNEL__ -D__NR_CPUS__=24 -DLINUX_VERSION_CODE=0x41000 ... Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180312094313.18738-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4952b429caa7..1cca0a2fa641 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -433,6 +433,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + char *command_echo, *command_out; if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -487,6 +488,16 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); + + if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0) + goto errout; + + err = read_from_pipe(command_echo, (void **) &command_out, NULL); + if (err) + goto errout; + + pr_debug("llvm compiling command : %s\n", command_out); + err = read_from_pipe(template, &obj_buf, &obj_buf_sz); if (err) { pr_err("ERROR:\tunable to compile %s\n", path); @@ -497,6 +508,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, goto errout; } + free(command_echo); + free(command_out); free(kbuild_dir); free(kbuild_include_opts); @@ -509,6 +522,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, *p_obj_buf_sz = obj_buf_sz; return 0; errout: + free(command_echo); free(kbuild_dir); free(kbuild_include_opts); free(obj_buf); From 26e4711fc8352252f474a02429d7495652c4aef7 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 12 Mar 2018 11:38:07 +0100 Subject: [PATCH 31/47] perf stat: Make function perf_stat_evsel_id_init static Function perf_stat_evsel_id_init() has global linkage but is only used in util/stat.c. Make it static. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180312103807.45069-2-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 2 +- tools/perf/util/stat.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 32235657c1ac..a0061e0b0fad 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -92,7 +92,7 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { }; #undef ID -void perf_stat_evsel_id_init(struct perf_evsel *evsel) +static void perf_stat_evsel_id_init(struct perf_evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; int i; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2f44e386a0e8..8f56ba4fd258 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,8 +128,6 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, #define perf_stat_evsel__is(evsel, id) \ __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) -void perf_stat_evsel_id_init(struct perf_evsel *evsel); - extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; From c192524e6fe8a4bd18f2549f9556b81ed9e05a86 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 12 Mar 2018 16:24:06 +0100 Subject: [PATCH 32/47] perf machine: Fix mmap name setup Leo reported broken -k option behavior. The reason is that we used symbol_conf.vmlinux_name as a source for mmap event name, but in fact it's a vmlinux path. Moving the symbol_conf.vmlinux_name check for both host and guest to the proper place and out of the machine__set_mmap_name function. Reported-by: Leo Yan Signed-off-by: Jiri Olsa Tested-by: Leo Yan Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: commit ("8c7f1bb37b29 perf machine: Move kernel mmap name into struct machine") Link: http://lkml.kernel.org/r/20180312152406.10141-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 43fbbee409ec..2eca8478e24f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -50,21 +50,13 @@ static void machine__threads_init(struct machine *machine) static int machine__set_mmap_name(struct machine *machine) { - if (machine__is_host(machine)) { - if (symbol_conf.vmlinux_name) - machine->mmap_name = strdup(symbol_conf.vmlinux_name); - else - machine->mmap_name = strdup("[kernel.kallsyms]"); - } else if (machine__is_default_guest(machine)) { - if (symbol_conf.default_guest_vmlinux_name) - machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name); - else - machine->mmap_name = strdup("[guest.kernel.kallsyms]"); - } else { - if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", - machine->pid) < 0) - machine->mmap_name = NULL; - } + if (machine__is_host(machine)) + machine->mmap_name = strdup("[kernel.kallsyms]"); + else if (machine__is_default_guest(machine)) + machine->mmap_name = strdup("[guest.kernel.kallsyms]"); + else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]", + machine->pid) < 0) + machine->mmap_name = NULL; return machine->mmap_name ? 0 : -ENOMEM; } @@ -794,9 +786,15 @@ static struct dso *machine__get_kernel(struct machine *machine) struct dso *kernel; if (machine__is_host(machine)) { + if (symbol_conf.vmlinux_name) + vmlinux_name = symbol_conf.vmlinux_name; + kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); } else { + if (symbol_conf.default_guest_vmlinux_name) + vmlinux_name = symbol_conf.default_guest_vmlinux_name; + kernel = machine__findnew_kernel(machine, vmlinux_name, "[guest.kernel]", DSO_TYPE_GUEST_KERNEL); From 10f354a36f9a9aa1b8bffe0abc1cd43822a85bcd Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 12 Mar 2018 18:14:50 +0530 Subject: [PATCH 33/47] perf test: Fix exit code for record+probe_libc_inet_pton.sh This fixes record+probe_libc_inet_pton.sh from always exiting with code 0 and making the test pass even if the perf script output does not match the expected pattern. The issue can be observed if this test is run with the verbose flags as shown below: 60: probe libc's inet_pton & backtrace it with ping : ... ping 19602 [006] 16988.413767: probe_libc:inet_pton: (7fff9a2c42e8) 1842e8 __GI___inet_pton (/usr/lib64/libc-2.26.so) 130db4 getaddrinfo (/usr/lib64/libc-2.26.so) FAIL: expected backtrace entry 3 ".*\(.*/bin/ping.*\)$" got "" test child finished with 0 ... probe libc's inet_pton & backtrace it with ping: Ok Signed-off-by: Sandipan Das Cc: Jiri Olsa Cc: Naveen N. Rao Fixes: e07d585e2454 ("perf tests: Switch trace+probe_libc_inet_pton to use record") Link: http://lkml.kernel.org/r/20180312124450.30371-1-sandipan@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 52c3ee701a89..1ecc1f0ff84a 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -47,7 +47,10 @@ trace_libc_inet_pton_backtrace() { [ -z "${expected[$idx]}" ] && break done - rm -f $file + # If any statements are executed from this point onwards, + # the exit code of the last among these will be reflected + # in err below. If the exit code is 0, the test will pass + # even if the perf script output does not match. } # Check for IPv6 interface existence From 6810158d526e483868e519befff407b91e76b3db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Mar 2018 10:34:11 -0300 Subject: [PATCH 34/47] perf annotate: Use asprintf when formatting objdump command line We were using a local buffer with an arbitrary size, that would have to get increased to avoid truncation as warned by gcc 8: util/annotate.c: In function 'symbol__disassemble': util/annotate.c:1488:4: error: '%s' directive output may be truncated writing up to 4095 bytes into a region of size between 3966 and 8086 [-Werror=format-truncation=] "%s %s%s --start-address=0x%016" PRIx64 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ util/annotate.c:1498:20: symfs_filename, symfs_filename); ~~~~~~~~~~~~~~ util/annotate.c:1490:50: note: format string is defined here " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", ^~ In file included from /usr/include/stdio.h:861, from util/color.h:5, from util/sort.h:8, from util/annotate.c:14: /usr/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 116 or more bytes (assuming 8331) into a destination of size 8192 return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __bos (__s), __fmt, __va_arg_pack ()); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ So switch to asprintf, that will make sure enough space is available. Cc: Adrian Hunter Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qagoy2dmbjpc9gdnaj0r3mml@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc3302da702b..ddad87f34a68 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1427,7 +1427,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; struct dso *dso = map->dso; - char command[PATH_MAX * 2]; + char *command; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1468,7 +1468,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) strcpy(symfs_filename, tmp); } - snprintf(command, sizeof(command), + err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", @@ -1481,12 +1481,17 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) symbol_conf.annotate_src ? "-S" : "", symfs_filename, symfs_filename); + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + goto out_remove_tmp; + } + pr_debug("Executing: %s\n", command); err = -1; if (pipe(stdout_fd) < 0) { pr_err("Failure creating the pipe to run %s\n", command); - goto out_remove_tmp; + goto out_free_command; } pid = fork(); @@ -1513,7 +1518,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * If we were using debug info should retry with * original binary. */ - goto out_remove_tmp; + goto out_free_command; } nline = 0; @@ -1541,6 +1546,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) fclose(file); err = 0; +out_free_command: + free(command); out_remove_tmp: close(stdout_fd[0]); @@ -1554,7 +1561,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) out_close_stdout: close(stdout_fd[1]); - goto out_remove_tmp; + goto out_free_command; } static void calc_percent(struct sym_hist *hist, From 66790bc8e1f51831d73691954ae0b430bde614ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 14 Mar 2018 17:33:54 +0000 Subject: [PATCH 35/47] perf tests: Fix out of bounds access on array fd when cnt is 100 Currently when cnt is 100 an array bounds overflow occurs on the assignment of fd[cnt]. Fix this by performing the bounds check on cnt before writing to fd. Detected by cppcheck: tools/perf/tests/bp_account.c:115: (warning) Either the condition 'cnt==100' is redundant or the array 'fd[100]' is accessed at index 100, which is out of bounds. Signed-off-by: Colin King Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Fixes: 032db28e5fa3 ("perf tests: Add breakpoint accounting/modify test") Link: http://lkml.kernel.org/r/20180314173354.11250-1-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_account.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 2f75fa0c4fef..9e88d7608951 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -107,16 +107,14 @@ static int detect_cnt(bool is_x) int fd[100], cnt = 0, i; while (1) { - fd[cnt] = __event(is_x, addr, &attr); - - if (fd[cnt] < 0) - break; - if (cnt == 100) { pr_debug("way too many debug registers, fix the test\n"); return 0; } + fd[cnt] = __event(is_x, addr, &attr); + if (fd[cnt] < 0) + break; cnt++; } From a2015516c5c0be932a69e1d3405c2fb03b4eacf1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Mar 2018 10:22:04 +0100 Subject: [PATCH 36/47] perf record: Synthesize features before events in pipe mode We need to synthesize events first, because some features works on top of them (on report side). Signed-off-by: Jiri Olsa Tested-by: Stephane Eranian Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180314092205.23291-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d33103291b02..22ebeb92ac51 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -754,13 +754,10 @@ static int record__synthesize(struct record *rec, bool tail) return 0; if (data->is_pipe) { - err = perf_event__synthesize_features( - tool, session, rec->evlist, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize features.\n"); - return err; - } - + /* + * We need to synthesize events first, because some + * features works on top of them (on report side). + */ err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { @@ -768,6 +765,13 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } + err = perf_event__synthesize_features(tool, session, rec->evlist, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + if (have_tracepoints(&rec->evlist->entries)) { /* * FIXME err <= 0 here actually means that From 57b5de463925b9fbd1eff56a38a510495ac9c2c0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Mar 2018 10:22:05 +0100 Subject: [PATCH 37/47] perf report: Support forced leader feature in pipe mode Stephane reported a problem with forced leader in pipe mode, where report does not force the group output. The reason is that we don't force the leader in pipe mode. This patch adds HEADER_LAST_FEATURE mark to have a point where we have all events and features received, and force the group if requested. $ perf record --group -e '{cycles, instructions}' -o - kill | perf report -i - --group SNIP # Overhead Command Shared Object Symbol # ................ ....... ................ ....................... # 28.36% 0.00% kill libc-2.25.so [.] __unregister_atfork 26.32% 0.00% kill libc-2.25.so [.] _dl_addr 26.10% 0.00% kill ld-2.25.so [.] _dl_relocate_object 17.32% 0.00% kill ld-2.25.so [.] __tunables_init 1.70% 0.01% kill [unknown] [k] 0xffffffffafa01a40 0.20% 0.00% kill ld-2.25.so [.] _start 0.00% 48.77% kill ld-2.25.so [.] do_lookup_x 0.00% 42.97% kill libc-2.25.so [.] _IO_getline 0.00% 6.35% kill ld-2.25.so [.] strcmp 0.00% 1.71% kill ld-2.25.so [.] _dl_sysdep_start 0.00% 0.19% kill ld-2.25.so [.] _dl_start Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Stephane Eranian Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180314092205.23291-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 57 ++++++++++++++++++++++++++++--------- tools/perf/util/header.c | 11 ++++++- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 971ccba85464..91da12975642 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -68,6 +68,7 @@ struct report { bool header; bool header_only; bool nonany_branch_mode; + bool group_set; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -193,6 +194,45 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, return err; } +/* + * Events in data file are not collect in groups, but we still want + * the group display. Set the artificial group and set the leader's + * forced_leader flag to notify the display code. + */ +static void setup_forced_leader(struct report *report, + struct perf_evlist *evlist) +{ + if (report->group_set && !evlist->nr_groups) { + struct perf_evsel *leader = perf_evlist__first(evlist); + + perf_evlist__set_leader(evlist); + leader->forced_leader = true; + } +} + +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct report *rep = container_of(tool, struct report, tool); + + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + + if (event->feat.feat_id != HEADER_LAST_FEATURE) { + pr_err("failed: wrong feature ID: %" PRIu64 "\n", + event->feat.feat_id); + return -1; + } + + /* + * All features are received, we can force the + * group if needed. + */ + setup_forced_leader(rep, session->evlist); + return 0; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -940,7 +980,6 @@ int cmd_report(int argc, const char **argv) "perf report []", NULL }; - bool group_set = false; struct report report = { .tool = { .sample = process_sample_event, @@ -958,7 +997,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, - .feature = perf_event__process_feature, + .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -1060,7 +1099,7 @@ int cmd_report(int argc, const char **argv) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), - OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set, + OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, "Show event group information together"), OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for per branch histogram filling", @@ -1177,17 +1216,7 @@ int cmd_report(int argc, const char **argv) has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - /* - * Events in data file are not collect in groups, but we still want - * the group display. Set the artificial group and set the leader's - * forced_leader flag to notify the display code. - */ - if (group_set && !session->evlist->nr_groups) { - struct perf_evsel *leader = perf_evlist__first(session->evlist); - - perf_evlist__set_leader(session->evlist); - leader->forced_leader = true; - } + setup_forced_leader(&report, session->evlist); if (itrace_synth_opts.last_branch) has_br_stack = true; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e14b3f7c7212..121df1683c36 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3415,8 +3415,17 @@ int perf_event__synthesize_features(struct perf_tool *tool, return ret; } } + + /* Send HEADER_LAST_FEATURE mark. */ + fe = ff.buf; + fe->feat_id = HEADER_LAST_FEATURE; + fe->header.type = PERF_RECORD_HEADER_FEATURE; + fe->header.size = sizeof(*fe); + + ret = process(tool, ff.buf, NULL, NULL); + free(ff.buf); - return 0; + return ret; } int perf_event__process_feature(struct perf_tool *tool, From 9749adc3b2a23c91b2eda8758ff0c650d731aa2f Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 13 Mar 2018 12:33:29 -0500 Subject: [PATCH 38/47] perf vendor events: Update POWER9 events Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Sukadev Bhattiprolu Link: https://lkml.kernel.org/r/20180313224647.GA22960@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/cache.json | 25 -- .../arch/powerpc/power9/frontend.json | 10 - .../arch/powerpc/power9/marked.json | 5 - .../arch/powerpc/power9/memory.json | 5 - .../pmu-events/arch/powerpc/power9/other.json | 241 ++++++++++++------ .../arch/powerpc/power9/pipeline.json | 50 ++-- .../pmu-events/arch/powerpc/power9/pmc.json | 5 - .../arch/powerpc/power9/translation.json | 10 +- 8 files changed, 178 insertions(+), 173 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 7945c5196c43..851072105054 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -19,11 +19,6 @@ "EventName": "PM_CMPLU_STALL_FXU", "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes" }, - {, - "EventCode": "0x1D15C", - "EventName": "PM_MRK_DTLB_MISS_1G", - "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M. Implies radix translation was used" - }, {, "EventCode": "0x4D12A", "EventName": "PM_MRK_DATA_FROM_RL4_CYC", @@ -79,21 +74,6 @@ "EventName": "PM_THRESH_EXC_4096", "BriefDescription": "Threshold counter exceed a count of 4096" }, - {, - "EventCode": "0x3D156", - "EventName": "PM_MRK_DTLB_MISS_64K", - "BriefDescription": "Marked Data TLB Miss page size 64K" - }, - {, - "EventCode": "0x4C15E", - "EventName": "PM_MRK_DTLB_MISS_16M", - "BriefDescription": "Marked Data TLB Miss page size 16M" - }, - {, - "EventCode": "0x2D15E", - "EventName": "PM_MRK_DTLB_MISS_16G", - "BriefDescription": "Marked Data TLB Miss page size 16G" - }, {, "EventCode": "0x3F14A", "EventName": "PM_MRK_DPTEG_FROM_RMEM", @@ -123,10 +103,5 @@ "EventCode": "0x1002A", "EventName": "PM_CMPLU_STALL_LARX", "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" - }, - {, - "EventCode": "0x1C058", - "EventName": "PM_DTLB_MISS_16G", - "BriefDescription": "Data TLB Miss page size 16G" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index bd8361b5fd6a..f9fa84b16fb5 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -154,11 +154,6 @@ "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load" }, - {, - "EventCode": "0x3C056", - "EventName": "PM_DTLB_MISS_64K", - "BriefDescription": "Data TLB Miss page size 64K" - }, {, "EventCode": "0x30060", "EventName": "PM_TM_TRANS_RUN_INST", @@ -344,11 +339,6 @@ "EventName": "PM_MRK_LARX_FIN", "BriefDescription": "Larx finished" }, - {, - "EventCode": "0x4C056", - "EventName": "PM_DTLB_MISS_16M", - "BriefDescription": "Data TLB Miss page size 16M" - }, {, "EventCode": "0x1003A", "EventName": "PM_CMPLU_STALL_LSU_FIN", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index 22f9f32060a8..b1954c38bab1 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -529,11 +529,6 @@ "EventName": "PM_L1_ICACHE_RELOADED_ALL", "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch" }, - {, - "EventCode": "0x4003C", - "EventName": "PM_DISP_HELD_SYNC_HOLD", - "BriefDescription": "Cycles in which dispatch is held because of a synchronizing instruction in the pipeline" - }, {, "EventCode": "0x3003C", "EventName": "PM_CMPLU_STALL_NESTED_TEND", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json index 9960d1c0dd44..2e2ebc700c74 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/memory.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json @@ -44,11 +44,6 @@ "EventName": "PM_LD_CMPL", "BriefDescription": "count of Loads completed" }, - {, - "EventCode": "0x2D156", - "EventName": "PM_MRK_DTLB_MISS_4K", - "BriefDescription": "Marked Data TLB Miss page size 4k" - }, {, "EventCode": "0x4C042", "EventName": "PM_DATA_FROM_L3", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 5ce312973f1e..48cf4f920b3f 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -69,6 +69,11 @@ "EventName": "PM_THRD_PRIO_0_1_CYC", "BriefDescription": "Cycles thread running at priority level 0 or 1" }, + {, + "EventCode": "0x4C054", + "EventName": "PM_DERAT_MISS_16G_1G", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) or 1G (radix mode)" + }, {, "EventCode": "0x2084", "EventName": "PM_FLUSH_HB_RESTORE_CYC", @@ -107,12 +112,12 @@ {, "EventCode": "0x360B2", "EventName": "PM_L3_GRP_GUESS_WRONG_LOW", - "BriefDescription": "Initial scope=group (GS or NNS) but data from outside group (far or rem). Prediction too Low" + "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was LNS" }, {, "EventCode": "0x168A6", "EventName": "PM_TM_CAM_OVERFLOW", - "BriefDescription": "L3 TM cam overflow during L2 co of SC" + "BriefDescription": "L3 TM CAM is full when a L2 castout of TM_SC line occurs. Line is pushed to memory" }, {, "EventCode": "0xE8B0", @@ -149,11 +154,6 @@ "EventName": "PM_ISU3_ISS_HOLD_ALL", "BriefDescription": "All ISU rejects" }, - {, - "EventCode": "0x460A6", - "EventName": "PM_RD_FORMING_SC", - "BriefDescription": "Read forming SC" - }, {, "EventCode": "0x468A0", "EventName": "PM_L3_PF_OFF_CHIP_MEM", @@ -187,7 +187,7 @@ {, "EventCode": "0x368A6", "EventName": "PM_SNP_TM_HIT_T", - "BriefDescription": "Snp TM sthit T/Tn/Te" + "BriefDescription": "TM snoop that is a store hits line in L3 in T, Tn or Te state (shared modified)" }, {, "EventCode": "0x3001A", @@ -204,6 +204,11 @@ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" }, + {, + "EventCode": "0xF0B4", + "EventName": "PM_DC_PREF_CONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch. The sum of this pair subtracted from the total number of allocs will give the total allocs in normal phase" + }, {, "EventCode": "0xF894", "EventName": "PM_LSU3_L1_CAM_CANCEL", @@ -227,7 +232,12 @@ {, "EventCode": "0x468A6", "EventName": "PM_RD_CLEARING_SC", - "BriefDescription": "Read clearing SC" + "BriefDescription": "Core TM load hits line in L3 in TM_SC state and causes it to be invalidated" + }, + {, + "EventCode": "0xD0B0", + "EventName": "PM_HWSYNC", + "BriefDescription": "" }, {, "EventCode": "0x168B0", @@ -264,6 +274,11 @@ "EventName": "PM_DC_PREF_HW_ALLOC", "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" }, + {, + "EventCode": "0xF0BC", + "EventName": "PM_LS2_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0xD0AC", "EventName": "PM_SRQ_SYNC_CYC", @@ -274,6 +289,11 @@ "EventName": "PM_MRK_INST_FROM_L3MISS", "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet" }, + {, + "EventCode": "0x58A8", + "EventName": "PM_DECODE_HOLD_ICT_FULL", + "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread" + }, {, "EventCode": "0x26082", "EventName": "PM_L2_IC_INV", @@ -364,6 +384,16 @@ "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" }, + {, + "EventCode": "0xF888", + "EventName": "PM_LSU1_STORE_REJECT", + "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + }, + {, + "EventCode": "0xC098", + "EventName": "PM_LS2_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0x20058", "EventName": "PM_DARQ1_10_12_ENTRIES", @@ -372,7 +402,7 @@ {, "EventCode": "0x360A6", "EventName": "PM_SNP_TM_HIT_M", - "BriefDescription": "Snp TM st hit M/Mu" + "BriefDescription": "TM snoop that is a store hits line in L3 in M or Mu state (exclusive modified)" }, {, "EventCode": "0x5898", @@ -395,9 +425,9 @@ "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch" }, {, - "EventCode": "0xF888", - "EventName": "PM_LSU1_STORE_REJECT", - "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" + "EventCode": "0x2608E", + "EventName": "PM_TM_LD_CONF", + "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)" }, {, "EventCode": "0x1D144", @@ -422,7 +452,7 @@ {, "EventCode": "0x26884", "EventName": "PM_DSIDE_MRU_TOUCH", - "BriefDescription": "D-side L2 MRU touch sent to L2" + "BriefDescription": "D-side L2 MRU touch commands sent to the L2" }, {, "EventCode": "0x30134", @@ -439,6 +469,16 @@ "EventName": "PM_EAT_FORCE_MISPRED", "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" }, + {, + "EventCode": "0xC094", + "EventName": "PM_LS0_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, + {, + "EventCode": "0xF8BC", + "EventName": "PM_LS3_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0x460AE", "EventName": "PM_L3_P2_CO_RTY", @@ -492,7 +532,7 @@ {, "EventCode": "0xC880", "EventName": "PM_LS1_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS1 finished load vector op" }, {, "EventCode": "0x2894", @@ -514,6 +554,11 @@ "EventName": "PM_MRK_LSU_DERAT_MISS", "BriefDescription": "Marked derat reload (miss) for any page size" }, + {, + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" + }, {, "EventCode": "0x1C04A", "EventName": "PM_DATA_FROM_RL2L3_SHR", @@ -564,11 +609,21 @@ "EventName": "PM_L2_LOC_GUESS_WRONG", "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)" }, + {, + "EventCode": "0xC888", + "EventName": "PM_LSU_DTLB_MISS_64K", + "BriefDescription": "Data TLB Miss page size 64K" + }, {, "EventCode": "0xE0A4", "EventName": "PM_TMA_REQ_L2", "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" }, + {, + "EventCode": "0xC088", + "EventName": "PM_LSU_DTLB_MISS_4K", + "BriefDescription": "Data TLB Miss page size 4K" + }, {, "EventCode": "0x3C042", "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", @@ -602,7 +657,7 @@ {, "EventCode": "0x26084", "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread that failed due to reason other than address collision (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflicts with an L2 machines (e.g. Read-Claim/Snoop machine not available)" }, {, "EventCode": "0x101E4", @@ -647,12 +702,12 @@ {, "EventCode": "0x46080", "EventName": "PM_L2_DISP_ALL_L2MISS", - "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld/St or I-side-instruction-fetch dispatches for this thread that were an L2 miss" }, {, - "EventCode": "0x160A0", - "EventName": "PM_L3_PF_MISS_L3", - "BriefDescription": "L3 PF missed in L3" + "EventCode": "0xF8B8", + "EventName": "PM_LS1_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" }, {, "EventCode": "0x408C", @@ -667,7 +722,7 @@ {, "EventCode": "0x160B2", "EventName": "PM_L3_LOC_GUESS_CORRECT", - "BriefDescription": "initial scope=node/chip (LNS) and data from local node (local) (pred successful) - always PFs only" + "BriefDescription": "Prefetch scope predictor selected LNS and was correct" }, {, "EventCode": "0x48B4", @@ -767,7 +822,7 @@ {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread" }, {, "EventCode": "0xF8B0", @@ -787,7 +842,7 @@ {, "EventCode": "0x16884", "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR", - "BriefDescription": "All I-od-D side load dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ machine (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflicts with an L2 machines already working on this line (e.g. ld-hit-stq or Read-claim/Castout/Snoop machines)" }, {, "EventCode": "0x460A0", @@ -829,6 +884,11 @@ "EventName": "PM_IC_PREF_REQ", "BriefDescription": "Instruction prefetch requests" }, + {, + "EventCode": "0xC898", + "EventName": "PM_LS3_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0x488C", "EventName": "PM_IC_PREF_WRITE", @@ -837,7 +897,7 @@ {, "EventCode": "0xF89C", "EventName": "PM_XLATE_MISS", - "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions" + "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions. Includes instruction, prefetch and demand" }, {, "EventCode": "0x14158", @@ -849,10 +909,15 @@ "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load" }, + {, + "EventCode": "0xC88C", + "EventName": "PM_LSU_DTLB_MISS_16G_1G", + "BriefDescription": "Data TLB Miss page size 16G (HPT) or 1G (Radix)" + }, {, "EventCode": "0x268A6", "EventName": "PM_TM_RST_SC", - "BriefDescription": "TM-snp rst RM SC" + "BriefDescription": "TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated" }, {, "EventCode": "0x468A4", @@ -917,7 +982,7 @@ {, "EventCode": "0x46086", "EventName": "PM_L2_SN_M_RD_DONE", - "BriefDescription": "SNP dispatched for a read and was M (true M)" + "BriefDescription": "Snoop dispatched for a read and was M (true M)" }, {, "EventCode": "0x40154", @@ -979,15 +1044,10 @@ "EventName": "PM_LINK_STACK_CORRECT", "BriefDescription": "Link stack predicts right address" }, - {, - "EventCode": "0x4C05A", - "EventName": "PM_DTLB_MISS_1G", - "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used" - }, {, "EventCode": "0x36886", "EventName": "PM_L2_SN_SX_I_DONE", - "BriefDescription": "SNP dispatched and went from Sx to Ix" + "BriefDescription": "Snoop dispatched and went from Sx to Ix" }, {, "EventCode": "0x4E04A", @@ -999,11 +1059,6 @@ "EventName": "PM_MRK_DATA_FROM_DL4_CYC", "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load" }, - {, - "EventCode": "0x2608E", - "EventName": "PM_TM_LD_CONF", - "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)" - }, {, "EventCode": "0x4080", "EventName": "PM_INST_FROM_L1", @@ -1037,7 +1092,7 @@ {, "EventCode": "0x260A6", "EventName": "PM_NON_TM_RST_SC", - "BriefDescription": "Non-TM snp rst TM SC" + "BriefDescription": "Non-TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated" }, {, "EventCode": "0x3608A", @@ -1064,11 +1119,6 @@ "EventName": "PM_FLUSH_MPRED", "BriefDescription": "Branch mispredict flushes. Includes target and address misprecition" }, - {, - "EventCode": "0x508C", - "EventName": "PM_SHL_CREATED", - "BriefDescription": "Store-Hit-Load Table Entry Created" - }, {, "EventCode": "0x1504C", "EventName": "PM_IPTEG_FROM_LL4", @@ -1107,7 +1157,7 @@ {, "EventCode": "0x2608A", "EventName": "PM_ISIDE_DISP_FAIL_ADDR", - "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a addr collision with another machine (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflict with an L2 machine already working on this line (e.g. ld-hit-stq or RC/CO/SN machines)" }, {, "EventCode": "0x50B4", @@ -1180,9 +1230,9 @@ "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed" }, {, - "EventCode": "0xE0B8", - "EventName": "PM_LS2_TM_DISALLOW", - "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + "EventCode": "0xD8AC", + "EventName": "PM_LWSYNC", + "BriefDescription": "" }, {, "EventCode": "0x2094", @@ -1209,6 +1259,11 @@ "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" }, + {, + "EventCode": "0xC894", + "EventName": "PM_LS1_UNALIGNED_LD", + "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0x360A2", "EventName": "PM_L3_L2_CO_HIT", @@ -1292,7 +1347,7 @@ {, "EventCode": "0xC084", "EventName": "PM_LS2_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS2 finished load vector op" }, {, "EventCode": "0x1608E", @@ -1344,6 +1399,11 @@ "EventName": "PM_SN_USAGE", "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" }, + {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, {, "EventCode": "0x46084", "EventName": "PM_L2_RCST_DISP_FAIL_OTHER", @@ -1354,11 +1414,6 @@ "EventName": "PM_DC_PREF_STRIDED_CONF", "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." }, - {, - "EventCode": "0x36084", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "All D-side store dispatch attempts for this thread" - }, {, "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", @@ -1372,7 +1427,7 @@ {, "EventCode": "0x36080", "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)" + "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread" }, {, "EventCode": "0x3504C", @@ -1387,7 +1442,7 @@ {, "EventCode": "0x1688A", "EventName": "PM_ISIDE_DISP", - "BriefDescription": "All I-side dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread" }, {, "EventCode": "0x468AA", @@ -1419,6 +1474,11 @@ "EventName": "PM_LSU2_TM_L1_HIT", "BriefDescription": "Load tm hit in L1" }, + {, + "EventCode": "0xE0B8", + "EventName": "PM_LS2_TM_DISALLOW", + "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it" + }, {, "EventCode": "0x44044", "EventName": "PM_INST_FROM_L31_ECO_MOD", @@ -1467,7 +1527,7 @@ {, "EventCode": "0x36086", "EventName": "PM_L2_RC_ST_DONE", - "BriefDescription": "RC did store to line that was Tx or Sx" + "BriefDescription": "Read-claim machine did store to line that was in Tx or Sx (Tagged or Shared state)" }, {, "EventCode": "0xE8AC", @@ -1499,6 +1559,11 @@ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" }, + {, + "EventCode": "0x460A6", + "EventName": "PM_RD_FORMING_SC", + "BriefDescription": "Doesn't occur" + }, {, "EventCode": "0x35042", "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", @@ -1527,7 +1592,7 @@ {, "EventCode": "0x36882", "EventName": "PM_L2_LD_HIT", - "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread that were L2 hits" }, {, "EventCode": "0x168AC", @@ -1554,11 +1619,6 @@ "EventName": "PM_PROBE_NOP_DISP", "BriefDescription": "ProbeNops dispatched" }, - {, - "EventCode": "0x58A8", - "EventName": "PM_DECODE_HOLD_ICT_FULL", - "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread" - }, {, "EventCode": "0x10052", "EventName": "PM_GRP_PUMP_MPRED_RTY", @@ -1572,7 +1632,7 @@ {, "EventCode": "0x2688A", "EventName": "PM_ISIDE_DISP_FAIL_OTHER", - "BriefDescription": "All I-side dispatch attempts for this thread that failed due to a reason other than addrs collision (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflict with an L2 machine (e.g. no available RC/CO machines)" }, {, "EventCode": "0x2001A", @@ -1652,12 +1712,12 @@ {, "EventCode": "0x46880", "EventName": "PM_ISIDE_MRU_TOUCH", - "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread" + "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread I-side L2 MRU touch commands sent to the L2 for this thread" }, {, - "EventCode": "0x1C05C", - "EventName": "PM_DTLB_MISS_2M", - "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used" + "EventCode": "0x508C", + "EventName": "PM_SHL_CREATED", + "BriefDescription": "Store-Hit-Load Table Entry Created" }, {, "EventCode": "0x50B8", @@ -1672,7 +1732,7 @@ {, "EventCode": "0x268B2", "EventName": "PM_L3_LOC_GUESS_WRONG", - "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" + "BriefDescription": "Prefetch scope predictor selected LNS, but was wrong" }, {, "EventCode": "0x36088", @@ -1684,6 +1744,11 @@ "EventName": "PM_L3_P2_PF_RTY", "BriefDescription": "L3 PF received retry port 2, every retry counted" }, + {, + "EventCode": "0xD8B0", + "EventName": "PM_PTESYNC", + "BriefDescription": "" + }, {, "EventCode": "0x26086", "EventName": "PM_CO_TM_SC_FOOTPRINT", @@ -1739,6 +1804,11 @@ "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" }, + {, + "EventCode": "0xF8B4", + "EventName": "PM_DC_PREF_XCONS_ALLOC", + "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" + }, {, "EventCode": "0x35048", "EventName": "PM_IPTEG_FROM_DL2L3_SHR", @@ -1782,7 +1852,7 @@ {, "EventCode": "0x460B2", "EventName": "PM_L3_SYS_GUESS_WRONG", - "BriefDescription": "Initial scope=system (VGS or RNS) but data from local or near. Prediction too high" + "BriefDescription": "Prefetch scope predictor selected VGS or RNS, but was wrong" }, {, "EventCode": "0x58B8", @@ -1799,11 +1869,6 @@ "EventName": "PM_TM_TABORT_TRECLAIM", "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim" }, - {, - "EventCode": "0x4C054", - "EventName": "PM_DERAT_MISS_16G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G" - }, {, "EventCode": "0x268A0", "EventName": "PM_L3_CO_L31", @@ -1862,7 +1927,7 @@ {, "EventCode": "0x368B2", "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", - "BriefDescription": "Initial scope=group (GS or NNS) but data from local node. Prediction too high" + "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was VGS or RNS" }, {, "EventCode": "0xE8BC", @@ -1897,7 +1962,7 @@ {, "EventCode": "0x260B2", "EventName": "PM_L3_SYS_GUESS_CORRECT", - "BriefDescription": "Initial scope=system (VGS or RNS) and data from outside group (far or rem)(pred successful)" + "BriefDescription": "Prefetch scope predictor selected VGS or RNS and was correct" }, {, "EventCode": "0x1D146", @@ -1914,6 +1979,11 @@ "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" }, + {, + "EventCode": "0xC08C", + "EventName": "PM_LSU_DTLB_MISS_16M_2M", + "BriefDescription": "Data TLB Miss page size 16M (HPT) or 2M (Radix)" + }, {, "EventCode": "0x16080", "EventName": "PM_L2_LD", @@ -1927,7 +1997,7 @@ {, "EventCode": "0xC080", "EventName": "PM_LS0_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS0 finished load vector op" }, {, "EventCode": "0x368B0", @@ -1999,6 +2069,11 @@ "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" }, + {, + "EventCode": "0xF0B8", + "EventName": "PM_LS0_UNALIGNED_ST", + "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + }, {, "EventCode": "0x20132", "EventName": "PM_MRK_DFU_FIN", @@ -2007,7 +2082,7 @@ {, "EventCode": "0x160A6", "EventName": "PM_TM_SC_CO", - "BriefDescription": "L3 castout TM SC line" + "BriefDescription": "L3 castout of line that was StoreCopy (original value of speculatively written line) in a Transaction" }, {, "EventCode": "0xC8B0", @@ -2017,7 +2092,7 @@ {, "EventCode": "0x16084", "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread" }, {, "EventCode": "0x3F150", @@ -2122,12 +2197,12 @@ {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" + "BriefDescription": "All successful D-side store dispatches for this thread" }, {, "EventCode": "0x36880", "EventName": "PM_L2_INST_MISS", - "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" + "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread that were an L2 miss" }, {, "EventCode": "0xE084", @@ -2217,7 +2292,7 @@ {, "EventCode": "0xC884", "EventName": "PM_LS3_LD_VECTOR_FIN", - "BriefDescription": "" + "BriefDescription": "LS3 finished load vector op" }, {, "EventCode": "0x360A8", @@ -2242,7 +2317,7 @@ {, "EventCode": "0x168B2", "EventName": "PM_L3_GRP_GUESS_CORRECT", - "BriefDescription": "Initial scope=group (GS or NNS) and data from same group (near) (pred successful)" + "BriefDescription": "Prefetch scope predictor selected GS or NNS and was correct" }, {, "EventCode": "0x48A4", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index 5af1abbe82c4..b4772f54a271 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -64,11 +64,6 @@ "EventName": "PM_DISP_HELD", "BriefDescription": "Dispatch Held" }, - {, - "EventCode": "0x3D154", - "EventName": "PM_MRK_DERAT_MISS_16M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M" - }, {, "EventCode": "0x200F8", "EventName": "PM_EXT_INT", @@ -119,6 +114,11 @@ "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, + {, + "EventCode": "0x4C15C", + "EventName": "PM_MRK_DERAT_MISS_16G_1G", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) and 1G (radix mode)" + }, {, "EventCode": "0x10024", "EventName": "PM_PMC5_OVERFLOW", @@ -154,11 +154,6 @@ "EventName": "PM_ICT_NOSLOT_IC_MISS", "BriefDescription": "Ict empty for this thread due to Icache Miss" }, - {, - "EventCode": "0x3D152", - "EventName": "PM_MRK_DERAT_MISS_1G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" - }, {, "EventCode": "0x4F14A", "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE", @@ -184,11 +179,6 @@ "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, - {, - "EventCode": "0x2C05A", - "EventName": "PM_DERAT_MISS_1G", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation" - }, {, "EventCode": "0x1F058", "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2", @@ -239,11 +229,6 @@ "EventName": "PM_DTLB_MISS", "BriefDescription": "Data PTEG reload" }, - {, - "EventCode": "0x2D152", - "EventName": "PM_MRK_DERAT_MISS_2M", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" - }, {, "EventCode": "0x2C046", "EventName": "PM_DATA_FROM_RL2L3_MOD", @@ -289,6 +274,11 @@ "EventName": "PM_CMPLU_STALL_DFU", "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle" }, + {, + "EventCode": "0x3C054", + "EventName": "PM_DERAT_MISS_16M_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M (HPT mode) or 2M (Radix mode)" + }, {, "EventCode": "0x4C04C", "EventName": "PM_DATA_FROM_DMEM", @@ -359,11 +349,6 @@ "EventName": "PM_INST_FROM_MEMORY", "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)" }, - {, - "EventCode": "0x1C05A", - "EventName": "PM_DERAT_MISS_2M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation" - }, {, "EventCode": "0x30024", "EventName": "PM_PMC6_OVERFLOW", @@ -374,6 +359,11 @@ "EventName": "PM_BRU_FIN", "BriefDescription": "Branch Instruction Finished" }, + {, + "EventCode": "0x3D154", + "EventName": "PM_MRK_DERAT_MISS_16M_2M", + "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M (hpt mode) or 2M (radix mode)" + }, {, "EventCode": "0x30020", "EventName": "PM_PMC2_REWIND", @@ -409,11 +399,6 @@ "EventName": "PM_MRK_DPTEG_FROM_L31_MOD", "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" }, - {, - "EventCode": "0x4C15C", - "EventName": "PM_MRK_DERAT_MISS_16G", - "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G" - }, {, "EventCode": "0x14052", "EventName": "PM_INST_GRP_PUMP_MPRED_RTY", @@ -444,11 +429,6 @@ "EventName": "PM_IC_DEMAND_CYC", "BriefDescription": "Icache miss demand cycles" }, - {, - "EventCode": "0x3C054", - "EventName": "PM_DERAT_MISS_16M", - "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M" - }, {, "EventCode": "0x2D14E", "EventName": "PM_MRK_DATA_FROM_L21_SHR", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index d0b89f930567..8b3b0f3be664 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -9,11 +9,6 @@ "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", "BriefDescription": "Local memory above threshold for LSU medium" }, - {, - "EventCode": "0x2C056", - "EventName": "PM_DTLB_MISS_4K", - "BriefDescription": "Data TLB Miss page size 4k" - }, {, "EventCode": "0x40118", "EventName": "PM_MRK_DCACHE_RELOAD_INTV", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index bc8e03d7a6b0..b27642676244 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -29,11 +29,6 @@ "EventName": "PM_ST_FIN", "BriefDescription": "Store finish count. Includes speculative activity" }, - {, - "EventCode": "0x44042", - "EventName": "PM_INST_FROM_L3", - "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)" - }, {, "EventCode": "0x1504A", "EventName": "PM_IPTEG_FROM_RL2L3_SHR", @@ -124,6 +119,11 @@ "EventName": "PM_PMC1_SAVED", "BriefDescription": "PMC1 Rewind Value saved" }, + {, + "EventCode": "0x44042", + "EventName": "PM_INST_FROM_L3", + "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)" + }, {, "EventCode": "0x200FE", "EventName": "PM_DATA_FROM_L2MISS", From 699db111058798bcc9f92a93767062905a561bef Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Tue, 13 Mar 2018 20:31:13 +0800 Subject: [PATCH 39/47] perf mmap: Discard head in overwrite_rb_find_range() In overwrite mode, start will be set to head in perf_mmap__read_init(). Therefore, there is no need to set the start one more time in overwrite_rb_find_range() and *start can be used as head instead of passing head to overwrite_rb_find_range(). Signed-off-by: Yisheng Xie Reviewed-by: Kan Liang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1520944274-37001-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 074c4fd3b67e..38ca3ffb9d61 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -199,19 +199,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) { struct perf_event_header *pheader; - u64 evt_head = head; + u64 evt_head = *start; int size = mask + 1; - pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); - pheader = (struct perf_event_header *)(buf + (head & mask)); - *start = head; + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); while (true) { - if (evt_head - head >= (unsigned int)size) { + if (evt_head - *start >= (unsigned int)size) { pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - head > (unsigned int)size) + if (evt_head - *start > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; return 0; @@ -262,7 +261,7 @@ int perf_mmap__read_init(struct perf_mmap *md) * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (overwrite_rb_find_range(data, md->mask, head, &md->start, &md->end)) + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) return -EINVAL; } From a08f6dd4190e90dc7b013435acb66770f117e8b0 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Tue, 13 Mar 2018 20:31:14 +0800 Subject: [PATCH 40/47] perf debug: Avoid setting 'quiet' to 'true' unnecessarily When using --quiet to disable messages, we will set the 'quiet' variable to 'true' first, then check that variable to decide whether we need to call perf_quiet_option(), so no need to set 'quiet' to 'true' once more in perf_quiet_option(). Signed-off-by: Yisheng Xie Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1520944274-37001-2-git-send-email-xieyisheng1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index f3a71db83947..3d6459626c2a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -232,7 +232,6 @@ int perf_quiet_option(void) var++; } - quiet = true; return 0; } From 77f18153c080855e1c3fb520ca31a4e61530121d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 19 Mar 2018 09:29:01 +0100 Subject: [PATCH 41/47] perf tools: Fix snprint warnings for gcc 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 8 we get new set of snprintf() warnings that breaks the compilation, one example: tests/mem.c: In function ‘check’: tests/mem.c:19:48: error: ‘%s’ directive output may be truncated writing \ up to 99 bytes into a region of size 89 [-Werror=format-truncation=] snprintf(failure, sizeof failure, "unexpected %s", out); The gcc docs says: To avoid the warning either use a bigger buffer or handle the function's return value which indicates whether or not its output has been truncated. Given that all these warnings are harmless, because the code either properly fails due to uncomplete file path or we don't care for truncated output at all, I'm changing all those snprintf() calls to scnprintf(), which actually 'checks' for the snprint return value so the gcc stays silent. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20180319082902.4518-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 22 +++++++++++----------- tools/perf/tests/attr.c | 4 ++-- tools/perf/tests/mem.c | 2 +- tools/perf/tests/pmu.c | 2 +- tools/perf/util/cgroup.c | 2 +- tools/perf/util/parse-events.c | 4 ++-- tools/perf/util/pmu.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index cce926aeb0c0..313c42423393 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2674,8 +2674,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2684,8 +2684,8 @@ static int list_available_scripts(const struct option *opt __maybe_unused, script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -2721,7 +2721,7 @@ static int check_ev_match(char *dir_name, char *scriptname, int match, len; FILE *fp; - sprintf(filename, "%s/bin/%s-record", dir_name, scriptname); + scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname); fp = fopen(filename, "r"); if (!fp) @@ -2799,8 +2799,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array) } for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -2855,8 +2855,8 @@ static char *get_script_path(const char *script_root, const char *suffix) return NULL; for_each_lang(scripts_path, scripts_dir, lang_dirent) { - snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent->d_name); + scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; @@ -2867,8 +2867,8 @@ static char *get_script_path(const char *script_root, const char *suffix) free(__script_root); closedir(lang_dir); closedir(scripts_dir); - snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent->d_name); + scnprintf(script_path, MAXPATHLEN, "%s/%s", + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 97f64ad7fa08..05dfe11c2f9e 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -170,8 +170,8 @@ static int run_dir(const char *d, const char *perf) if (verbose > 0) vcnt++; - snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); + scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", + d, d, perf, vcnt, v); return system(cmd) ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c index 21952e1e6e6d..0f82ee9fd3f7 100644 --- a/tools/perf/tests/mem.c +++ b/tools/perf/tests/mem.c @@ -16,7 +16,7 @@ static int check(union perf_mem_data_src data_src, n = perf_mem__snp_scnprintf(out, sizeof out, &mi); n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi); - snprintf(failure, sizeof failure, "unexpected %s", out); + scnprintf(failure, sizeof failure, "unexpected %s", out); TEST_ASSERT_VAL(failure, !strcmp(string, out)); return 0; } diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 9abca267afa9..7bedf8608fdd 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -98,7 +98,7 @@ static char *test_format_dir_get(void) struct test_format *format = &test_formats[i]; FILE *file; - snprintf(name, PATH_MAX, "%s/%s", dir, format->name); + scnprintf(name, PATH_MAX, "%s/%s", dir, format->name); file = fopen(name, "w"); if (!file) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 78408f5c4bad..decb91f9da82 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -81,7 +81,7 @@ static int open_cgroup(const char *name) if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) return -1; - snprintf(path, PATH_MAX, "%s/%s", mnt, name); + scnprintf(path, PATH_MAX, "%s/%s", mnt, name); fd = open(path, O_RDONLY); if (fd == -1) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4e80ca320399..2fb0272146d8 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -206,8 +206,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) for_each_event(sys_dirent, evt_dir, evt_dirent) { - snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent->d_name); + scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 1111d5bf15ca..064bdcb7bd78 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -351,7 +351,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) if (pmu_alias_info_file(name)) continue; - snprintf(path, PATH_MAX, "%s/%s", dir, name); + scnprintf(path, PATH_MAX, "%s/%s", dir, name); file = fopen(path, "r"); if (!file) { From b7a313d84e853049062011d78cb04b6decd12f5c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 19 Mar 2018 09:29:02 +0100 Subject: [PATCH 42/47] perf tools: Fix python extension build for gcc 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gcc 8 compiler won't compile the python extension code with the following errors (one example): python.c:830:15: error: cast between incompatible function types from \ ‘PyObject * (*)(struct pyrf_evsel *, PyObject *, PyObject *)’ \ uct _object * (*)(struct pyrf_evsel *, struct _object *, struct _object *)’} to \ ‘PyObject * (*)(PyObject *, PyObject *)’ {aka ‘struct _object * (*)(struct _objeuct \ _object *)’} [-Werror=cast-function-type] .ml_meth = (PyCFunction)pyrf_evsel__open, The problem with the PyMethodDef::ml_meth callback is that its type is determined based on the PyMethodDef::ml_flags value, which we set as METH_VARARGS | METH_KEYWORDS. That indicates that the callback is expecting an extra PyObject* arg, and is actually PyCFunctionWithKeywords type, but the base PyMethodDef::ml_meth type stays PyCFunction. Previous gccs did not find this, gcc8 now does. Fixing this by silencing this warning for python.c build. Commiter notes: Do not do that for CC=clang, as it breaks the build in some clang versions, like the ones in fedora up to fedora27: fedora:25:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] fedora:26:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] fedora:27:error: unknown warning option '-Wno-cast-function-type'; did you mean '-Wno-bad-function-cast'? [-Werror,-Wunknown-warning-option] # those have: clang version 3.9.1 (tags/RELEASE_391/final) The one in rawhide accepts that: clang version 6.0.0 (tags/RELEASE_600/final) Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20180319082902.4518-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 6891635b50c3..001be4f9d3b9 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -28,6 +28,8 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +if cc != "clang": + cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') From a8403912d04e2c8271653bb5b7f6294dc6d322ac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Mar 2018 16:24:34 -0300 Subject: [PATCH 43/47] perf top: Document --ignore-vmlinux We've had this since 2013, document it. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Willy Tarreau Fixes: fc2be6968e99 ("perf symbols: Add new option --ignore-vmlinux for perf top") Link: https://lkml.kernel.org/n/tip-0jwfueooddwfsw9r603belxi@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index a039407d63b8..114fda12aa49 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -67,6 +67,9 @@ Default is to monitor all CPUS. --vmlinux=:: Path to vmlinux. Required for annotation functionality. +--ignore-vmlinux:: + Ignore vmlinux files. + -m :: --mmap-pages=:: Number of mmap data pages (must be a power of two) or size From 4c9cb2c2b4b5530717f74b2252f8cc4c45b2a918 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Mar 2018 13:28:09 -0300 Subject: [PATCH 44/47] perf annotate: Use ops->target.name when available for unresolved call targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a bug where when using 'perf annotate timerqueue_add' the target for its only routine called with the 'callq' instruction, 'rb_insert_color', doesn't get resolved from its address when parsing that 'callq' instruction. That symbol resolution works when using 'perf report --tui' and then doing annotation for 'timerqueue_add' from there, the vmlinux dso->symbols rb_tree somehow gets in a state that we can't find that address, that is a bug that has to be further investigated. But since the objdump output has the function name, i.e. the raw objdump disassembled line looks like: So, before: # perf annotate timerqueue_add │ mov %rbx,%rdi │ mov %rbx,(%rdx) │ → callq *ffffffff8184dc80 │ mov 0x8(%rbp),%rdx │ test %rdx,%rdx │ ↓ je 67 # perf report │ mov %rbx,%rdi │ mov %rbx,(%rdx) │ → callq rb_insert_color │ mov 0x8(%rbp),%rdx │ test %rdx,%rdx │ ↓ je 67 And after both look the same: # perf annotate timerqueue_add │ mov %rbx,%rdi │ mov %rbx,(%rdx) │ → callq rb_insert_color │ mov 0x8(%rbp),%rdx │ test %rdx,%rdx │ ↓ je 67 From 'perf report' one can annotate and navigate to that 'rb_insert_color' function, but not directly from 'perf annotate timerqueue_add', that remains to be investigated and fixed. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-nkktz6355rhqtq7o8atr8f8r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ddad87f34a68..535357c6ce02 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -238,6 +238,9 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); + if (ops->target.name) + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } From d0461794a1dcaf552b507e23788777f718b736a1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 17 Mar 2018 21:52:25 +0900 Subject: [PATCH 45/47] perf probe: Use right type to access array elements Current 'perf probe' converts the type of array-elements incorrectly. It always converts the types as a pointer of array. This passes the "array" type DIE to the type converter so that it can get correct "element of array" type DIE from it. E.g. ==== $ cat hello.c #include void foo(int a[]) { printf("%d\n", a[1]); } void main() { int a[3] = {4, 5, 6}; printf("%d\n", a[0]); foo(a); } $ gcc -g hello.c -o hello $ perf probe -x ./hello -D "foo a[1]" ==== Without this fix, above outputs ==== p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):u64 ==== The "u64" means "int *", but a[1] is "int". With this, ==== p:probe_hello/foo /tmp/hello:0x4d3 a=+4(-8(%bp)):s32 ==== So, "int" correctly converted to "s32" Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Shuah Khan Cc: Steven Rostedt Cc: Tom Zanussi Cc: linux-kselftest@vger.kernel.org Cc: linux-trace-users@vger.kernel.org Fixes: b2a3c12b7442 ("perf probe: Support tracing an entry of array") Link: http://lkml.kernel.org/r/152129114502.31874.2474068470011496356.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index a5731de0e5eb..c37fbef1711d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -423,20 +423,20 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Var real type: (%x)\n", (unsigned)dwarf_dieoffset(&type)); + pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type), + (unsigned)dwarf_dieoffset(&type)); tag = dwarf_tag(&type); if (field->name[0] == '[' && (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) { - if (field->next) - /* Save original type for next field */ - memcpy(die_mem, &type, sizeof(*die_mem)); + /* Save original type for next field or type */ + memcpy(die_mem, &type, sizeof(*die_mem)); /* Get the type of this array */ if (die_get_real_type(&type, &type) == NULL) { pr_warning("Failed to get the type of %s.\n", varname); return -ENOENT; } - pr_debug2("Array real type: (%x)\n", + pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type), (unsigned)dwarf_dieoffset(&type)); if (tag == DW_TAG_pointer_type) { ref = zalloc(sizeof(struct probe_trace_arg_ref)); @@ -448,9 +448,6 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, *ref_ptr = ref; } ref->offset += dwarf_bytesize(&type) * field->index; - if (!field->next) - /* Save vr_die for converting types */ - memcpy(die_mem, vr_die, sizeof(*die_mem)); goto next; } else if (tag == DW_TAG_pointer_type) { /* Check the pointer and dereference */ From 854e55ad289ef8888e7991f0ada85d5846f5afb9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 15 Mar 2018 22:11:54 -0500 Subject: [PATCH 46/47] objtool, perf: Fix GCC 8 -Wrestrict error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with recent GCC 8 builds, objtool and perf fail to build with the following error: ../str_error_r.c: In function ‘str_error_r’: ../str_error_r.c:25:3: error: passing argument 1 to restrict-qualified parameter aliases with argument 5 [-Werror=restrict] snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err); The code seems harmless, but there's probably no benefit in printing the 'buf' pointer in this situation anyway, so just remove it to make GCC happy. Reported-by: Laura Abbott Signed-off-by: Josh Poimboeuf Tested-by: Laura Abbott Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/r/20180316031154.juk2uncs7baffctp@treble Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/str_error_r.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c index d6d65537b0d9..6aad8308a0ac 100644 --- a/tools/lib/str_error_r.c +++ b/tools/lib/str_error_r.c @@ -22,6 +22,6 @@ char *str_error_r(int errnum, char *buf, size_t buflen) { int err = strerror_r(errnum, buf, buflen); if (err) - snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, %p, %zd)=%d", errnum, buf, buflen, err); + snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err); return buf; } From 1cd618838b9703eabe4a75badf433382b12f6bef Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Mar 2018 10:51:00 -0300 Subject: [PATCH 47/47] perf tests bp_account: Fix build with clang-6 To shut up this compiler warning: CC /tmp/build/perf/tests/bp_account.o CC /tmp/build/perf/tests/task-exit.o CC /tmp/build/perf/tests/sw-clock.o tests/bp_account.c:106:20: error: pointer type mismatch ('int (*)(void)' and 'void *') [-Werror,-Wpointer-type-mismatch] void *addr = is_x ? test_function : (void *) &the_var; ^ ~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~ 1 error generated. Noticed with clang 6 on fedora rawhide. [perfbuilder@44490f0e7241 perf]$ clang -v clang version 6.0.0 (tags/RELEASE_600/final) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/bin Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8 Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/8 Selected GCC installation: /usr/bin/../lib/gcc/x86_64-redhat-linux/8 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 [perfbuilder@44490f0e7241 perf]$ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 032db28e5fa3 ("perf tests: Add breakpoint accounting/modify test") Link: https://lkml.kernel.org/n/tip-a3jnkzh4xam0l954de5tn66d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_account.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 9e88d7608951..a20cbc445426 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -103,7 +103,7 @@ static int bp_accounting(int wp_cnt, int share) static int detect_cnt(bool is_x) { struct perf_event_attr attr; - void *addr = is_x ? test_function : (void *) &the_var; + void *addr = is_x ? (void *)test_function : (void *)&the_var; int fd[100], cnt = 0, i; while (1) {