From 3e2c1a67d616dbc1034bc39448cd5f4aa3bd3cca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jul 2016 19:37:52 +0200 Subject: [PATCH] perf/x86/intel: Clean up LBR state tracking The lbr_context logic confused me; it appears to me to try and do the same thing the pmu::sched_task() callback does now, but limited to per-task events. So rip it out. Afaict this should also improve performance, because I think the current code can end up doing lbr_reset() twice, once from the pmu::add() and then again from pmu::sched_task(), and MSR writes (all 3*16 of them) are expensive!! While thinking through the cases that need the reset it occured to me the first install of an event in an active context needs to reset the LBR (who knows what crap is in there), but detecting this case is somewhat hard. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 57 ++++++++++++++++++------------------ arch/x86/events/perf_event.h | 1 - 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 439b09d33856..fc6cf21c535e 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx; /* @@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) */ task_ctx = ctx ? ctx->task_ctx_data : NULL; if (task_ctx) { - if (sched_in) { + if (sched_in) __intel_pmu_lbr_restore(task_ctx); - cpuc->lbr_context = ctx; - } else { + else __intel_pmu_lbr_save(task_ctx); - } return; } /* - * When sampling the branck stack in system-wide, it may be - * necessary to flush the stack on context switch. This happens - * when the branch stack does not tag its entries with the pid - * of the current task. Otherwise it becomes impossible to - * associate a branch entry with a task. This ambiguity is more - * likely to appear when the branch stack supports priv level - * filtering and the user sets it to monitor only at the user - * level (which could be a useful measurement in system-wide - * mode). In that case, the risk is high of having a branch - * stack with branch from multiple tasks. - */ - if (sched_in) { + * Since a context switch can flip the address space and LBR entries + * are not tagged with an identifier, we need to wipe the LBR, even for + * per-cpu events. You simply cannot resolve the branches from the old + * address space. + */ + if (sched_in) intel_pmu_lbr_reset(); - cpuc->lbr_context = ctx; - } } static inline bool branch_user_callstack(unsigned br_sel) @@ -430,14 +419,6 @@ void intel_pmu_lbr_add(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - /* - * Reset the LBR stack if we changed task context to - * avoid data leaks. - */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { - intel_pmu_lbr_reset(); - cpuc->lbr_context = event->ctx; - } cpuc->br_sel = event->hw.branch_reg.reg; if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) { @@ -445,8 +426,28 @@ void intel_pmu_lbr_add(struct perf_event *event) task_ctx->lbr_callstack_users++; } - cpuc->lbr_users++; + /* + * Request pmu::sched_task() callback, which will fire inside the + * regular perf event scheduling, so that call will: + * + * - restore or wipe; when LBR-callstack, + * - wipe; otherwise, + * + * when this is from __perf_event_task_sched_in(). + * + * However, if this is from perf_install_in_context(), no such callback + * will follow and we'll need to reset the LBR here if this is the + * first LBR event. + * + * The problem is, we cannot tell these cases apart... but we can + * exclude the biggest chunk of cases by looking at + * event->total_time_running. An event that has accrued runtime cannot + * be 'new'. Conversely, a new event can get installed through the + * context switch path for the first time. + */ perf_sched_cb_inc(event->ctx->pmu); + if (!cpuc->lbr_users++ && !event->total_time_running) + intel_pmu_lbr_reset(); } void intel_pmu_lbr_del(struct perf_event *event) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index aa6ea5a84240..5874d8de1f8d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -201,7 +201,6 @@ struct cpu_hw_events { * Intel LBR bits */ int lbr_users; - void *lbr_context; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; struct er_account *lbr_sel;