Peter Zijlstra says:

"Cleanup of the perf/kvm interaction."
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmHdvbkACgkQEsHwGGHe
 VUrX7w/9FwKUm0WlGcQIAOSdWk85N2qAVH3brYcQHNpTCVe68TOqTCrxCDrGgyUq
 2XnCOim99MUlnsVU6QRZqF4yJ8S1tGrc0COJ/qR4SGntucu0oYuDe2aMVq+mWUD7
 /IThA0oMRfhki9WwAyUuyCrXzk4blZdlrXyYIRMJGl9xeGNy3cvUtU8f68Kiy22E
 OcmQ/o9Etsr38dueAMU1KYEmgSTvG47rS8nfyRUu3QpJHbyLmRXH32PQrm3tduxS
 Bw3gMAH5vqq1UDZJ8ZvsPsO0vFX7dtnKEwEKz4qdtRWk9gi8oLGHIwIXC+VtNqpf
 mCmX33Jw8uFz9h3JhE84J0j/CgsWHoU6MOs0MOch4Tb69/BfCjQnw1enImhejG8q
 YEIDjJf/vgRNaw9PYshiTHT+EJTe9inT3S4eK/ynLRDUEslAqyWZZm7bUE/XrEDi
 yRyGIxry/hNZVvRkXT9QBw32fpgnIH2NAMPLEjJSGCRxT89Tfqz0aRDfacCuHTTh
 P8pAeiDuy/6RkDlQckOZJWOFFh2IHsykX2l3IJcHqVRqt4ob9b+SZB5qoH/Mv9qb
 MSAqdFUupYZFC+6XuPAeX5/Mo+wSkP+pYYSbWNxjUa0yNiYecOjE7/8T2SB2y6Mx
 lk2L0ypsZUYSmpHSfvOdPmf6ucj19/5B4+VCX6PQfcNJTnvvhTE=
 =tU5G
 -----END PGP SIGNATURE-----

Merge tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Borislav Petkov:
 "Cleanup of the perf/kvm interaction."

* tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Drop guest callback (un)register stubs
  KVM: arm64: Drop perf.c and fold its tiny bits of code into arm.c
  KVM: arm64: Hide kvm_arm_pmu_available behind CONFIG_HW_PERF_EVENTS=y
  KVM: arm64: Convert to the generic perf callbacks
  KVM: x86: Move Intel Processor Trace interrupt handler to vmx.c
  KVM: Move x86's perf guest info callbacks to generic KVM
  KVM: x86: More precisely identify NMI from guest when handling PMI
  KVM: x86: Drop current_vcpu for kvm_running_vcpu + kvm_arch_vcpu variable
  perf/core: Use static_call to optimize perf_guest_info_callbacks
  perf: Force architectures to opt-in to guest callbacks
  perf: Add wrappers for invoking guest callbacks
  perf/core: Rework guest callbacks to prepare for static_call support
  perf: Drop dead and useless guest "support" from arm, csky, nds32 and riscv
  perf: Stop pretending that perf can handle multiple guest callbacks
  KVM: x86: Register Processor Trace interrupt hook iff PT enabled in guest
  KVM: x86: Register perf callbacks after calling vendor's hardware_setup()
  perf: Protect perf_guest_cbs with RCU
This commit is contained in:
Linus Torvalds 2022-01-12 16:26:58 -08:00
commit 8e5b0adeea
29 changed files with 248 additions and 256 deletions

View File

@ -64,11 +64,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
{
struct frame_tail __user *tail;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->ARM_pc);
if (!current->mm)
@ -100,20 +95,12 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
{
struct stackframe fr;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return perf_guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
@ -121,17 +108,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
{
int misc = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
return misc;
}

View File

@ -675,8 +675,15 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu);
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa);
int kvm_perf_init(void);
int kvm_perf_teardown(void);
/*
* Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
* arrived in guest context. For arm64, any event that arrives while a vCPU is
* loaded is considered to be "in guest".
*/
static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
{
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
}
long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);

View File

@ -102,7 +102,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
/* PMU available static key */
#ifdef CONFIG_HW_PERF_EVENTS
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
#endif
/* Position-independent library routines */
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);

View File

@ -102,7 +102,7 @@ compat_user_backtrace(struct compat_frame_tail __user *tail,
void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_state()) {
/* We don't support guest os callchain now */
return;
}
@ -141,7 +141,7 @@ static bool callchain_trace(void *data, unsigned long pc)
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_state()) {
/* We don't support guest os callchain now */
return;
}
@ -151,18 +151,19 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return perf_guest_cbs->get_guest_ip();
if (perf_guest_state())
return perf_guest_get_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
unsigned int guest_state = perf_guest_state();
int misc = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_cbs->is_user_mode())
if (guest_state) {
if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;

View File

@ -39,6 +39,7 @@ menuconfig KVM
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
help
Support hosting virtualized guest machines.

View File

@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
$(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \

View File

@ -503,6 +503,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return vcpu_mode_priv(vcpu);
}
#ifdef CONFIG_GUEST_PERF_EVENTS
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
{
return *vcpu_pc(vcpu);
}
#endif
/* Just ensure a guest exit from a particular CPU */
static void exit_vm_noop(void *info)
{
@ -1775,7 +1782,8 @@ static int init_subsystems(void)
if (err)
goto out;
kvm_perf_init();
kvm_register_perf_callbacks(NULL);
kvm_sys_reg_table_init();
out:
@ -2163,7 +2171,7 @@ int kvm_arch_init(void *opaque)
/* NOP: Compiling as a module not supported */
void kvm_arch_exit(void)
{
kvm_perf_teardown();
kvm_unregister_perf_callbacks();
}
static int __init early_kvm_mode_cfg(char *arg)

View File

@ -1,59 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Based on the x86 implementation.
*
* Copyright (C) 2012 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <asm/kvm_emulate.h>
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static int kvm_is_in_guest(void)
{
return kvm_get_running_vcpu() != NULL;
}
static int kvm_is_user_mode(void)
{
struct kvm_vcpu *vcpu;
vcpu = kvm_get_running_vcpu();
if (vcpu)
return !vcpu_mode_priv(vcpu);
return 0;
}
static unsigned long kvm_get_guest_ip(void)
{
struct kvm_vcpu *vcpu;
vcpu = kvm_get_running_vcpu();
if (vcpu)
return *vcpu_pc(vcpu);
return 0;
}
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.is_in_guest = kvm_is_in_guest,
.is_user_mode = kvm_is_user_mode,
.get_guest_ip = kvm_get_guest_ip,
};
int kvm_perf_init(void)
{
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
int kvm_perf_teardown(void)
{
return perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
}

View File

@ -14,6 +14,8 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);

View File

@ -88,10 +88,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
{
unsigned long fp = 0;
/* C-SKY does not support virtualization. */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return;
fp = regs->regs[4];
perf_callchain_store(entry, regs->pc);
@ -112,12 +108,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
{
struct stackframe fr;
/* C-SKY does not support virtualization. */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
pr_warn("C-SKY does not support perf in guest mode!");
return;
}
fr.fp = regs->regs[4];
fr.lr = regs->lr;
walk_stackframe(&fr, entry);

View File

@ -1371,11 +1371,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry,
leaf_fp = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->ipc);
fp = regs->fp;
gp = regs->gp;
@ -1481,10 +1476,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
{
struct stackframe fr;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
fr.fp = regs->fp;
fr.lp = regs->lp;
fr.sp = regs->sp;
@ -1493,10 +1484,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
/* However, NDS32 does not support virtualization */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return perf_guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
@ -1504,18 +1491,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
{
int misc = 0;
/* However, NDS32 does not support virtualization */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
} else {
if (user_mode(regs))
misc |= PERF_RECORD_MISC_USER;
else
misc |= PERF_RECORD_MISC_KERNEL;
}
return misc;
}

View File

@ -58,10 +58,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
{
unsigned long fp = 0;
/* RISC-V does not support perf in guest mode. */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return;
fp = regs->s0;
perf_callchain_store(entry, regs->epc);
@ -78,11 +74,5 @@ static bool fill_callchain(void *entry, unsigned long pc)
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
/* RISC-V does not support perf in guest mode. */
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
pr_warn("RISC-V does not support perf in guest mode!");
return;
}
walk_stackframe(NULL, regs, fill_callchain, entry);
}

View File

@ -2771,7 +2771,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
struct unwind_state state;
unsigned long addr;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
return;
}
@ -2874,7 +2874,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
struct stack_frame frame;
const struct stack_frame __user *fp;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_state()) {
/* TODO: We don't support guest os callchain now */
return;
}
@ -2951,18 +2951,19 @@ static unsigned long code_segment_base(struct pt_regs *regs)
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
return perf_guest_cbs->get_guest_ip();
if (perf_guest_state())
return perf_guest_get_ip();
return regs->ip + code_segment_base(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
unsigned int guest_state = perf_guest_state();
int misc = 0;
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
if (perf_guest_cbs->is_user_mode())
if (guest_state) {
if (guest_state & PERF_GUEST_USER)
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;

View File

@ -2901,10 +2901,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
handled++;
if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
perf_guest_cbs->handle_intel_pt_intr))
perf_guest_cbs->handle_intel_pt_intr();
else
if (!perf_guest_handle_intel_pt_intr())
intel_pt_interrupt();
}

View File

@ -774,6 +774,7 @@ struct kvm_vcpu_arch {
unsigned nmi_pending; /* NMI queued after currently running handler */
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
u8 handling_intr_from_guest;
struct kvm_mtrr mtrr_state;
u64 pat;
@ -1519,6 +1520,7 @@ struct kvm_x86_init_ops {
int (*disabled_by_bios)(void);
int (*check_processor_compatibility)(void);
int (*hardware_setup)(void);
unsigned int (*handle_intel_pt_intr)(void);
struct kvm_x86_ops *runtime_ops;
};
@ -1568,6 +1570,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
return -ENOTSUPP;
}
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@ -1897,8 +1902,6 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void);
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);

View File

@ -36,6 +36,7 @@ config KVM
select KVM_MMIO
select SCHED_INFO
select PERF_EVENTS
select GUEST_PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_NO_POLL

View File

@ -87,7 +87,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event,
* woken up. So we should wake it, but this is impossible from
* NMI context. Do it from irq work instead.
*/
if (!kvm_is_in_guest())
if (!kvm_handling_nmi_from_guest(pmc->vcpu))
irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
else
kvm_make_request(KVM_REQ_PMI, pmc->vcpu);

View File

@ -3933,7 +3933,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(vcpu);
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
kvm_load_host_xsave_state(vcpu);
stgi();

View File

@ -6344,7 +6344,9 @@ void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
unsigned long entry)
{
kvm_before_interrupt(vcpu);
bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
vmx_do_interrupt_nmi_irqoff(entry);
kvm_after_interrupt(vcpu);
}
@ -7693,6 +7695,20 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};
static unsigned int vmx_handle_intel_pt_intr(void)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
/* '0' on failure so that the !PT case can use a RET0 static call. */
if (!kvm_arch_pmi_in_guest(vcpu))
return 0;
kvm_make_request(KVM_REQ_PMI, vcpu);
__set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
(unsigned long *)&vcpu->arch.pmu.global_status);
return 1;
}
static __init void vmx_setup_user_return_msrs(void)
{
@ -7719,6 +7735,8 @@ static __init void vmx_setup_user_return_msrs(void)
kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
}
static struct kvm_x86_init_ops vmx_init_ops __initdata;
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
@ -7877,6 +7895,10 @@ static __init int hardware_setup(void)
return -EINVAL;
if (!enable_ept || !cpu_has_vmx_intel_pt())
pt_mode = PT_MODE_SYSTEM;
if (pt_mode == PT_MODE_HOST_GUEST)
vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
else
vmx_init_ops.handle_intel_pt_intr = NULL;
setup_default_sgx_lepubkeyhash();
@ -7905,6 +7927,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = {
.disabled_by_bios = vmx_disabled_by_bios,
.check_processor_compatibility = vmx_check_processor_compat,
.hardware_setup = hardware_setup,
.handle_intel_pt_intr = NULL,
.runtime_ops = &vmx_x86_ops,
};

View File

@ -8519,50 +8519,6 @@ static void kvm_timer_init(void)
kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
int kvm_is_in_guest(void)
{
return __this_cpu_read(current_vcpu) != NULL;
}
static int kvm_is_user_mode(void)
{
int user_mode = 3;
if (__this_cpu_read(current_vcpu))
user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
return user_mode != 0;
}
static unsigned long kvm_get_guest_ip(void)
{
unsigned long ip = 0;
if (__this_cpu_read(current_vcpu))
ip = kvm_rip_read(__this_cpu_read(current_vcpu));
return ip;
}
static void kvm_handle_intel_pt_intr(void)
{
struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
kvm_make_request(KVM_REQ_PMI, vcpu);
__set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
(unsigned long *)&vcpu->arch.pmu.global_status);
}
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.is_in_guest = kvm_is_in_guest,
.is_user_mode = kvm_is_user_mode,
.get_guest_ip = kvm_get_guest_ip,
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@ -8676,8 +8632,6 @@ int kvm_arch_init(void *opaque)
kvm_timer_init();
perf_register_guest_info_callbacks(&kvm_guest_cbs);
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@ -8709,7 +8663,6 @@ void kvm_arch_exit(void)
clear_hv_tscchange_cb();
#endif
kvm_lapic_exit();
perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
@ -9936,7 +9889,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* interrupts on processors that implement an interrupt shadow, the
* stat.exits increment will do nicely.
*/
kvm_before_interrupt(vcpu);
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
local_irq_enable();
++vcpu->stat.exits;
local_irq_disable();
@ -11269,6 +11222,8 @@ int kvm_arch_hardware_setup(void *opaque)
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
kvm_ops_static_call_update();
kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
@ -11296,6 +11251,8 @@ int kvm_arch_hardware_setup(void *opaque)
void kvm_arch_hardware_unsetup(void)
{
kvm_unregister_perf_callbacks();
static_call(kvm_x86_hardware_unsetup)();
}
@ -11885,6 +11842,11 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return vcpu->arch.preempted_in_kernel;
}
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
{
return kvm_rip_read(vcpu);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;

View File

@ -392,18 +392,27 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
return kvm->arch.cstate_in_guest;
}
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
enum kvm_intr_type {
/* Values are arbitrary, but must be non-zero. */
KVM_HANDLING_IRQ = 1,
KVM_HANDLING_NMI,
};
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
enum kvm_intr_type intr)
{
__this_cpu_write(current_vcpu, vcpu);
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
}
static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
__this_cpu_write(current_vcpu, NULL);
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
}
static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu)
{
return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI;
}
static inline bool kvm_pat_valid(u64 data)
{

View File

@ -23,6 +23,7 @@ config XEN_PV
select PARAVIRT_XXL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
select GUEST_PERF_EVENTS
help
Support running as a Xen PV guest.

View File

@ -413,34 +413,29 @@ int pmu_apic_update(uint32_t val)
}
/* perf callbacks */
static int xen_is_in_guest(void)
static unsigned int xen_guest_state(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
unsigned int state = 0;
if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return 0;
return state;
}
if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
return 0;
return state;
return 1;
}
state |= PERF_GUEST_ACTIVE;
static int xen_is_user_mode(void)
{
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return 0;
if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) {
if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER)
state |= PERF_GUEST_USER;
} else if (xenpmu_data->pmu.r.regs.cpl & 3) {
state |= PERF_GUEST_USER;
}
if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV)
return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER);
else
return !!(xenpmu_data->pmu.r.regs.cpl & 3);
return state;
}
static unsigned long xen_get_guest_ip(void)
@ -456,9 +451,8 @@ static unsigned long xen_get_guest_ip(void)
}
static struct perf_guest_info_callbacks xen_guest_cbs = {
.is_in_guest = xen_is_in_guest,
.is_user_mode = xen_is_user_mode,
.get_guest_ip = xen_get_guest_ip,
.state = xen_guest_state,
.get_ip = xen_get_guest_ip,
};
/* Convert registers from Xen's format to Linux' */

View File

@ -13,13 +13,6 @@
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static __always_inline bool kvm_arm_support_pmu_v3(void)
{
return static_branch_likely(&kvm_arm_pmu_available);
}
#ifdef CONFIG_HW_PERF_EVENTS
struct kvm_pmc {
@ -36,6 +29,13 @@ struct kvm_pmu {
struct irq_work overflow_work;
};
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static __always_inline bool kvm_arm_support_pmu_v3(void)
{
return static_branch_likely(&kvm_arm_pmu_available);
}
#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
@ -65,6 +65,11 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
struct kvm_pmu {
};
static inline bool kvm_arm_support_pmu_v3(void)
{
return false;
}
#define kvm_arm_pmu_irq_initialized(v) (false)
static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
u64 select_idx)

View File

@ -1166,6 +1166,16 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
}
#endif
#ifdef CONFIG_GUEST_PERF_EVENTS
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
void kvm_unregister_perf_callbacks(void);
#else
static inline void kvm_register_perf_callbacks(void *ign) {}
static inline void kvm_unregister_perf_callbacks(void) {}
#endif /* CONFIG_GUEST_PERF_EVENTS */
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_arch_sync_events(struct kvm *kvm);

View File

@ -26,11 +26,13 @@
# include <asm/local64.h>
#endif
#define PERF_GUEST_ACTIVE 0x01
#define PERF_GUEST_USER 0x02
struct perf_guest_info_callbacks {
int (*is_in_guest)(void);
int (*is_user_mode)(void);
unsigned long (*get_guest_ip)(void);
void (*handle_intel_pt_intr)(void);
unsigned int (*state)(void);
unsigned long (*get_ip)(void);
unsigned int (*handle_intel_pt_intr)(void);
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@ -1251,9 +1253,32 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
enum perf_bpf_event_type type,
u16 flags);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
#ifdef CONFIG_GUEST_PERF_EVENTS
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
static inline unsigned int perf_guest_state(void)
{
return static_call(__perf_guest_state)();
}
static inline unsigned long perf_guest_get_ip(void)
{
return static_call(__perf_guest_get_ip)();
}
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
{
return static_call(__perf_guest_handle_intel_pt_intr)();
}
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
#else
static inline unsigned int perf_guest_state(void) { return 0; }
static inline unsigned long perf_guest_get_ip(void) { return 0; }
static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
#endif /* CONFIG_GUEST_PERF_EVENTS */
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
@ -1497,11 +1522,6 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline int perf_register_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline int perf_unregister_guest_info_callbacks
(struct perf_guest_info_callbacks *callbacks) { return 0; }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);

View File

@ -1797,6 +1797,10 @@ config HAVE_PERF_EVENTS
help
See tools/perf/design.txt for details.
config GUEST_PERF_EVENTS
bool
depends on HAVE_PERF_EVENTS
config PERF_USE_VMALLOC
bool
help

View File

@ -6525,26 +6525,43 @@ static void perf_pending_event(struct irq_work *entry)
perf_swevent_put_recursion_context(rctx);
}
/*
* We assume there is only KVM supporting the callbacks.
* Later on, we might change it to a list if there is
* another virtualization implementation supporting the callbacks.
*/
struct perf_guest_info_callbacks *perf_guest_cbs;
#ifdef CONFIG_GUEST_PERF_EVENTS
struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state);
DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
perf_guest_cbs = cbs;
return 0;
if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs)))
return;
rcu_assign_pointer(perf_guest_cbs, cbs);
static_call_update(__perf_guest_state, cbs->state);
static_call_update(__perf_guest_get_ip, cbs->get_ip);
/* Implementing ->handle_intel_pt_intr is optional. */
if (cbs->handle_intel_pt_intr)
static_call_update(__perf_guest_handle_intel_pt_intr,
cbs->handle_intel_pt_intr);
}
EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
{
perf_guest_cbs = NULL;
return 0;
if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs))
return;
rcu_assign_pointer(perf_guest_cbs, NULL);
static_call_update(__perf_guest_state, (void *)&__static_call_return0);
static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0);
static_call_update(__perf_guest_handle_intel_pt_intr,
(void *)&__static_call_return0);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
#endif
static void
perf_output_sample_regs(struct perf_output_handle *handle,

View File

@ -5419,6 +5419,50 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
return &kvm_running_vcpu;
}
#ifdef CONFIG_GUEST_PERF_EVENTS
static unsigned int kvm_guest_state(void)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
unsigned int state;
if (!kvm_arch_pmi_in_guest(vcpu))
return 0;
state = PERF_GUEST_ACTIVE;
if (!kvm_arch_vcpu_in_kernel(vcpu))
state |= PERF_GUEST_USER;
return state;
}
static unsigned long kvm_guest_get_ip(void)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
/* Retrieving the IP must be guarded by a call to kvm_guest_state(). */
if (WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu)))
return 0;
return kvm_arch_vcpu_get_ip(vcpu);
}
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.state = kvm_guest_state,
.get_ip = kvm_guest_get_ip,
.handle_intel_pt_intr = NULL,
};
void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void))
{
kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler;
perf_register_guest_info_callbacks(&kvm_guest_cbs);
}
void kvm_unregister_perf_callbacks(void)
{
perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
}
#endif
struct kvm_cpu_compat_check {
void *opaque;
int *ret;