Merge branch 'for-next/perf' into aarch64/for-next/core
Merge in arm64 perf and PMU driver updates, including support for the system/uncore PMU in the ThunderX2 platform.
This commit is contained in:
commit
b47f515bdc
|
@ -0,0 +1,41 @@
|
|||
Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
|
||||
=============================================================
|
||||
|
||||
The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
|
||||
PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
|
||||
|
||||
The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
|
||||
Events are counted for the default channel (i.e. channel 0) and prorated
|
||||
to the total number of channels/tiles.
|
||||
|
||||
The DMC and L3C support up to 4 counters. Counters are independently
|
||||
programmable and can be started and stopped individually. Each counter
|
||||
can be set to a different event. Counters are 32-bit and do not support
|
||||
an overflow interrupt; they are read every 2 seconds.
|
||||
|
||||
PMU UNCORE (perf) driver:
|
||||
|
||||
The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
|
||||
L3C devices. Each PMU can be used to count up to 4 events
|
||||
simultaneously. The PMUs provide a description of their available events
|
||||
and configuration options under sysfs, see
|
||||
/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
|
||||
|
||||
The driver does not support sampling, therefore "perf record" will not
|
||||
work. Per-task perf sessions are also not supported.
|
||||
|
||||
Examples:
|
||||
|
||||
# perf stat -a -e uncore_dmc_0/cnt_cycles/ sleep 1
|
||||
|
||||
# perf stat -a -e \
|
||||
uncore_dmc_0/cnt_cycles/,\
|
||||
uncore_dmc_0/data_transfers/,\
|
||||
uncore_dmc_0/read_txns/,\
|
||||
uncore_dmc_0/write_txns/ sleep 1
|
||||
|
||||
# perf stat -a -e \
|
||||
uncore_l3c_0/read_request/,\
|
||||
uncore_l3c_0/read_hit/,\
|
||||
uncore_l3c_0/inv_request/,\
|
||||
uncore_l3c_0/inv_hit/ sleep 1
|
|
@ -23,6 +23,160 @@
|
|||
#define ARMV8_PMU_MAX_COUNTERS 32
|
||||
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
|
||||
|
||||
/*
|
||||
* Common architectural and microarchitectural event numbers.
|
||||
*/
|
||||
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
|
||||
#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
|
||||
#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
|
||||
#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
|
||||
#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
|
||||
#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
|
||||
#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
|
||||
#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
|
||||
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
|
||||
#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
|
||||
#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
|
||||
#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
|
||||
#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
|
||||
#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
|
||||
#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
|
||||
#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33
|
||||
#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34
|
||||
#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36
|
||||
#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37
|
||||
#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38
|
||||
|
||||
/* Statistical profiling extension microarchitectural events */
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
|
||||
#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
|
||||
|
||||
/* ARMv8 recommended implementation defined event types */
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
|
||||
#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
|
||||
#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
|
||||
#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
|
||||
#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
|
||||
#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
|
||||
#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
|
||||
#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
|
||||
#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
|
||||
#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
|
||||
#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
|
||||
#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
|
||||
#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
|
||||
#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
|
||||
#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
|
||||
|
||||
/*
|
||||
* Per-CPU PMCR: config reg
|
||||
*/
|
||||
|
@ -49,16 +203,6 @@
|
|||
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
|
||||
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
|
||||
|
||||
/*
|
||||
* PMUv3 event types: required events
|
||||
*/
|
||||
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
|
||||
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
|
||||
|
||||
/*
|
||||
* Event filters for PMUv3
|
||||
*/
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* PMU support
|
||||
* ARMv8 PMUv3 Performance Events handling code.
|
||||
*
|
||||
* Copyright (C) 2012 ARM Limited
|
||||
* Author: Will Deacon <will.deacon@arm.com>
|
||||
|
@ -30,149 +30,6 @@
|
|||
#include <linux/perf/arm_pmu.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
/*
|
||||
* ARMv8 PMUv3 Performance Events handling code.
|
||||
* Common event types (some are defined in asm/perf_event.h).
|
||||
*/
|
||||
|
||||
/* At least one of the following is required. */
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
|
||||
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
|
||||
|
||||
/* Common architectural events. */
|
||||
#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
|
||||
#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
|
||||
#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
|
||||
#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
|
||||
#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
|
||||
#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
|
||||
#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
|
||||
#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
|
||||
#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
|
||||
|
||||
/* Common microarchitectural events. */
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
|
||||
#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
|
||||
#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
|
||||
#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
|
||||
#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
|
||||
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23
|
||||
#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24
|
||||
#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25
|
||||
#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
|
||||
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
|
||||
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
|
||||
#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
|
||||
|
||||
/* ARMv8 recommended implementation defined event types */
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
|
||||
#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
|
||||
#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
|
||||
#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
|
||||
#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
|
||||
#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
|
||||
#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
|
||||
#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
|
||||
#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
|
||||
#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
|
||||
#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
|
||||
#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
|
||||
#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
|
||||
#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
|
||||
#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
|
||||
#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
|
||||
#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
|
||||
#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
|
||||
#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
|
||||
#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
|
||||
#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
|
||||
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
|
||||
#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
|
||||
|
||||
/* ARMv8 Cortex-A53 specific event types. */
|
||||
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
|
||||
|
||||
|
@ -183,12 +40,10 @@
|
|||
#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC
|
||||
#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
|
||||
|
||||
/* PMUv3 HW events mapping. */
|
||||
|
||||
/*
|
||||
* ARMv8 Architectural defined events, not all of these may
|
||||
* be supported on any given implementation. Undefined events will
|
||||
* be disabled at run-time.
|
||||
* be supported on any given implementation. Unsupported events will
|
||||
* be disabled at run-time based on the PMCEID registers.
|
||||
*/
|
||||
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
PERF_MAP_ALL_UNSUPPORTED,
|
||||
|
@ -210,8 +65,6 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
|||
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
|
||||
|
||||
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
|
||||
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
|
||||
|
@ -224,8 +77,6 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
|||
|
||||
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
|
||||
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
|
||||
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
|
||||
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
|
||||
};
|
||||
|
||||
static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
|
@ -370,6 +221,18 @@ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
|
|||
ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
|
||||
ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
|
||||
ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
|
||||
ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
|
||||
ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
|
||||
ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
|
||||
ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
|
||||
ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
|
||||
ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
|
||||
ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
|
||||
ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
|
||||
ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
|
||||
ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
|
||||
ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
|
||||
ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
|
||||
|
||||
static struct attribute *armv8_pmuv3_event_attrs[] = {
|
||||
&armv8_event_attr_sw_incr.attr.attr,
|
||||
|
@ -420,6 +283,18 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
|
|||
&armv8_event_attr_l2i_tlb_refill.attr.attr,
|
||||
&armv8_event_attr_l2d_tlb.attr.attr,
|
||||
&armv8_event_attr_l2i_tlb.attr.attr,
|
||||
&armv8_event_attr_remote_access.attr.attr,
|
||||
&armv8_event_attr_ll_cache.attr.attr,
|
||||
&armv8_event_attr_ll_cache_miss.attr.attr,
|
||||
&armv8_event_attr_dtlb_walk.attr.attr,
|
||||
&armv8_event_attr_itlb_walk.attr.attr,
|
||||
&armv8_event_attr_ll_cache_rd.attr.attr,
|
||||
&armv8_event_attr_ll_cache_miss_rd.attr.attr,
|
||||
&armv8_event_attr_remote_access_rd.attr.attr,
|
||||
&armv8_event_attr_sample_pop.attr.attr,
|
||||
&armv8_event_attr_sample_feed.attr.attr,
|
||||
&armv8_event_attr_sample_filtrate.attr.attr,
|
||||
&armv8_event_attr_sample_collision.attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -434,7 +309,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
|
|||
|
||||
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
|
||||
|
||||
if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
|
||||
if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
|
||||
test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
|
||||
return attr->mode;
|
||||
|
||||
pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE;
|
||||
if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS &&
|
||||
test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap))
|
||||
return attr->mode;
|
||||
|
||||
return 0;
|
||||
|
@ -1009,7 +890,7 @@ static int __armv8_pmuv3_map_event(struct perf_event *event,
|
|||
if (armv8pmu_event_is_64bit(event))
|
||||
event->hw.flags |= ARMPMU_EVT_64BIT;
|
||||
|
||||
/* Onl expose micro/arch events supported by this PMU */
|
||||
/* Only expose micro/arch events supported by this PMU */
|
||||
if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
|
||||
&& test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
|
||||
return hw_event_id;
|
||||
|
@ -1061,6 +942,7 @@ static void __armv8pmu_probe_pmu(void *info)
|
|||
struct armv8pmu_probe_info *probe = info;
|
||||
struct arm_pmu *cpu_pmu = probe->pmu;
|
||||
u64 dfr0;
|
||||
u64 pmceid_raw[2];
|
||||
u32 pmceid[2];
|
||||
int pmuver;
|
||||
|
||||
|
@ -1079,11 +961,17 @@ static void __armv8pmu_probe_pmu(void *info)
|
|||
/* Add the CPU cycles counter */
|
||||
cpu_pmu->num_events += 1;
|
||||
|
||||
pmceid[0] = read_sysreg(pmceid0_el0);
|
||||
pmceid[1] = read_sysreg(pmceid1_el0);
|
||||
pmceid[0] = pmceid_raw[0] = read_sysreg(pmceid0_el0);
|
||||
pmceid[1] = pmceid_raw[1] = read_sysreg(pmceid1_el0);
|
||||
|
||||
bitmap_from_arr32(cpu_pmu->pmceid_bitmap,
|
||||
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
|
||||
|
||||
pmceid[0] = pmceid_raw[0] >> 32;
|
||||
pmceid[1] = pmceid_raw[1] >> 32;
|
||||
|
||||
bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
|
||||
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
|
||||
}
|
||||
|
||||
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
|
||||
|
@ -1109,16 +997,16 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
cpu_pmu->handle_irq = armv8pmu_handle_irq,
|
||||
cpu_pmu->enable = armv8pmu_enable_event,
|
||||
cpu_pmu->disable = armv8pmu_disable_event,
|
||||
cpu_pmu->read_counter = armv8pmu_read_counter,
|
||||
cpu_pmu->write_counter = armv8pmu_write_counter,
|
||||
cpu_pmu->get_event_idx = armv8pmu_get_event_idx,
|
||||
cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx,
|
||||
cpu_pmu->start = armv8pmu_start,
|
||||
cpu_pmu->stop = armv8pmu_stop,
|
||||
cpu_pmu->reset = armv8pmu_reset,
|
||||
cpu_pmu->handle_irq = armv8pmu_handle_irq;
|
||||
cpu_pmu->enable = armv8pmu_enable_event;
|
||||
cpu_pmu->disable = armv8pmu_disable_event;
|
||||
cpu_pmu->read_counter = armv8pmu_read_counter;
|
||||
cpu_pmu->write_counter = armv8pmu_write_counter;
|
||||
cpu_pmu->get_event_idx = armv8pmu_get_event_idx;
|
||||
cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx;
|
||||
cpu_pmu->start = armv8pmu_start;
|
||||
cpu_pmu->stop = armv8pmu_stop;
|
||||
cpu_pmu->reset = armv8pmu_reset;
|
||||
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
|
||||
cpu_pmu->filter_match = armv8pmu_filter_match;
|
||||
|
||||
|
@ -1274,6 +1162,7 @@ static struct platform_driver armv8_pmu_driver = {
|
|||
.driver = {
|
||||
.name = ARMV8_PMU_PDEV_NAME,
|
||||
.of_match_table = armv8_pmu_of_device_ids,
|
||||
.suppress_bind_attrs = true,
|
||||
},
|
||||
.probe = armv8_pmu_device_probe,
|
||||
};
|
||||
|
|
|
@ -87,6 +87,15 @@ config QCOM_L3_PMU
|
|||
Adds the L3 cache PMU into the perf events subsystem for
|
||||
monitoring L3 cache events.
|
||||
|
||||
config THUNDERX2_PMU
|
||||
tristate "Cavium ThunderX2 SoC PMU UNCORE"
|
||||
depends on ARCH_THUNDER2 && ARM64 && ACPI && NUMA
|
||||
default m
|
||||
help
|
||||
Provides support for ThunderX2 UNCORE events.
|
||||
The SoC has PMU support in its L3 cache controller (L3C) and
|
||||
in the DDR4 Memory Controller (DMC).
|
||||
|
||||
config XGENE_PMU
|
||||
depends on ARCH_XGENE
|
||||
bool "APM X-Gene SoC PMU"
|
||||
|
|
|
@ -7,5 +7,6 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
|
|||
obj-$(CONFIG_HISI_PMU) += hisilicon/
|
||||
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
|
||||
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
|
||||
obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
|
||||
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
|
||||
obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
|
||||
|
|
|
@ -927,6 +927,11 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
|
|||
|
||||
idx = atomic_inc_return(&pmu_idx);
|
||||
name = devm_kasprintf(dev, GFP_KERNEL, "%s_%d", PMUNAME, idx);
|
||||
if (!name) {
|
||||
dev_err(dev, "failed to allocate name for pmu %d\n", idx);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return perf_pmu_register(&spe_pmu->pmu, name, -1);
|
||||
}
|
||||
|
||||
|
@ -1169,6 +1174,7 @@ static const struct of_device_id arm_spe_pmu_of_match[] = {
|
|||
{ .compatible = "arm,statistical-profiling-extension-v1", .data = (void *)1 },
|
||||
{ /* Sentinel */ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
|
||||
|
||||
static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,861 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* CAVIUM THUNDERX2 SoC PMU UNCORE
|
||||
* Copyright (C) 2018 Cavium Inc.
|
||||
* Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
|
||||
*/
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
/* Each ThunderX2(TX2) Socket has a L3C and DMC UNCORE PMU device.
|
||||
* Each UNCORE PMU device consists of 4 independent programmable counters.
|
||||
* Counters are 32 bit and do not support overflow interrupt,
|
||||
* they need to be sampled before overflow(i.e, at every 2 seconds).
|
||||
*/
|
||||
|
||||
#define TX2_PMU_MAX_COUNTERS 4
|
||||
#define TX2_PMU_DMC_CHANNELS 8
|
||||
#define TX2_PMU_L3_TILES 16
|
||||
|
||||
#define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC)
|
||||
#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f)
|
||||
#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3)
|
||||
/* 1 byte per counter(4 counters).
|
||||
* Event id is encoded in bits [5:1] of a byte,
|
||||
*/
|
||||
#define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1))
|
||||
|
||||
#define L3C_COUNTER_CTL 0xA8
|
||||
#define L3C_COUNTER_DATA 0xAC
|
||||
#define DMC_COUNTER_CTL 0x234
|
||||
#define DMC_COUNTER_DATA 0x240
|
||||
|
||||
/* L3C event IDs */
|
||||
#define L3_EVENT_READ_REQ 0xD
|
||||
#define L3_EVENT_WRITEBACK_REQ 0xE
|
||||
#define L3_EVENT_INV_N_WRITE_REQ 0xF
|
||||
#define L3_EVENT_INV_REQ 0x10
|
||||
#define L3_EVENT_EVICT_REQ 0x13
|
||||
#define L3_EVENT_INV_N_WRITE_HIT 0x14
|
||||
#define L3_EVENT_INV_HIT 0x15
|
||||
#define L3_EVENT_READ_HIT 0x17
|
||||
#define L3_EVENT_MAX 0x18
|
||||
|
||||
/* DMC event IDs */
|
||||
#define DMC_EVENT_COUNT_CYCLES 0x1
|
||||
#define DMC_EVENT_WRITE_TXNS 0xB
|
||||
#define DMC_EVENT_DATA_TRANSFERS 0xD
|
||||
#define DMC_EVENT_READ_TXNS 0xF
|
||||
#define DMC_EVENT_MAX 0x10
|
||||
|
||||
enum tx2_uncore_type {
|
||||
PMU_TYPE_L3C,
|
||||
PMU_TYPE_DMC,
|
||||
PMU_TYPE_INVALID,
|
||||
};
|
||||
|
||||
/*
|
||||
* pmu on each socket has 2 uncore devices(dmc and l3c),
|
||||
* each device has 4 counters.
|
||||
*/
|
||||
struct tx2_uncore_pmu {
|
||||
struct hlist_node hpnode;
|
||||
struct list_head entry;
|
||||
struct pmu pmu;
|
||||
char *name;
|
||||
int node;
|
||||
int cpu;
|
||||
u32 max_counters;
|
||||
u32 prorate_factor;
|
||||
u32 max_events;
|
||||
u64 hrtimer_interval;
|
||||
void __iomem *base;
|
||||
DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS);
|
||||
struct perf_event *events[TX2_PMU_MAX_COUNTERS];
|
||||
struct device *dev;
|
||||
struct hrtimer hrtimer;
|
||||
const struct attribute_group **attr_groups;
|
||||
enum tx2_uncore_type type;
|
||||
void (*init_cntr_base)(struct perf_event *event,
|
||||
struct tx2_uncore_pmu *tx2_pmu);
|
||||
void (*stop_event)(struct perf_event *event);
|
||||
void (*start_event)(struct perf_event *event, int flags);
|
||||
};
|
||||
|
||||
static LIST_HEAD(tx2_pmus);
|
||||
|
||||
static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu)
|
||||
{
|
||||
return container_of(pmu, struct tx2_uncore_pmu, pmu);
|
||||
}
|
||||
|
||||
PMU_FORMAT_ATTR(event, "config:0-4");
|
||||
|
||||
static struct attribute *l3c_pmu_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *dmc_pmu_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group l3c_pmu_format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = l3c_pmu_format_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group dmc_pmu_format_attr_group = {
|
||||
.name = "format",
|
||||
.attrs = dmc_pmu_format_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* sysfs event attributes
|
||||
*/
|
||||
static ssize_t tx2_pmu_event_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct dev_ext_attribute *eattr;
|
||||
|
||||
eattr = container_of(attr, struct dev_ext_attribute, attr);
|
||||
return sprintf(buf, "event=0x%lx\n", (unsigned long) eattr->var);
|
||||
}
|
||||
|
||||
#define TX2_EVENT_ATTR(name, config) \
|
||||
PMU_EVENT_ATTR(name, tx2_pmu_event_attr_##name, \
|
||||
config, tx2_pmu_event_show)
|
||||
|
||||
TX2_EVENT_ATTR(read_request, L3_EVENT_READ_REQ);
|
||||
TX2_EVENT_ATTR(writeback_request, L3_EVENT_WRITEBACK_REQ);
|
||||
TX2_EVENT_ATTR(inv_nwrite_request, L3_EVENT_INV_N_WRITE_REQ);
|
||||
TX2_EVENT_ATTR(inv_request, L3_EVENT_INV_REQ);
|
||||
TX2_EVENT_ATTR(evict_request, L3_EVENT_EVICT_REQ);
|
||||
TX2_EVENT_ATTR(inv_nwrite_hit, L3_EVENT_INV_N_WRITE_HIT);
|
||||
TX2_EVENT_ATTR(inv_hit, L3_EVENT_INV_HIT);
|
||||
TX2_EVENT_ATTR(read_hit, L3_EVENT_READ_HIT);
|
||||
|
||||
static struct attribute *l3c_pmu_events_attrs[] = {
|
||||
&tx2_pmu_event_attr_read_request.attr.attr,
|
||||
&tx2_pmu_event_attr_writeback_request.attr.attr,
|
||||
&tx2_pmu_event_attr_inv_nwrite_request.attr.attr,
|
||||
&tx2_pmu_event_attr_inv_request.attr.attr,
|
||||
&tx2_pmu_event_attr_evict_request.attr.attr,
|
||||
&tx2_pmu_event_attr_inv_nwrite_hit.attr.attr,
|
||||
&tx2_pmu_event_attr_inv_hit.attr.attr,
|
||||
&tx2_pmu_event_attr_read_hit.attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
TX2_EVENT_ATTR(cnt_cycles, DMC_EVENT_COUNT_CYCLES);
|
||||
TX2_EVENT_ATTR(write_txns, DMC_EVENT_WRITE_TXNS);
|
||||
TX2_EVENT_ATTR(data_transfers, DMC_EVENT_DATA_TRANSFERS);
|
||||
TX2_EVENT_ATTR(read_txns, DMC_EVENT_READ_TXNS);
|
||||
|
||||
static struct attribute *dmc_pmu_events_attrs[] = {
|
||||
&tx2_pmu_event_attr_cnt_cycles.attr.attr,
|
||||
&tx2_pmu_event_attr_write_txns.attr.attr,
|
||||
&tx2_pmu_event_attr_data_transfers.attr.attr,
|
||||
&tx2_pmu_event_attr_read_txns.attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group l3c_pmu_events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = l3c_pmu_events_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group dmc_pmu_events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = dmc_pmu_events_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* sysfs cpumask attributes
|
||||
*/
|
||||
static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
tx2_pmu = pmu_to_tx2_pmu(dev_get_drvdata(dev));
|
||||
return cpumap_print_to_pagebuf(true, buf, cpumask_of(tx2_pmu->cpu));
|
||||
}
|
||||
static DEVICE_ATTR_RO(cpumask);
|
||||
|
||||
static struct attribute *tx2_pmu_cpumask_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group pmu_cpumask_attr_group = {
|
||||
.attrs = tx2_pmu_cpumask_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* Per PMU device attribute groups
|
||||
*/
|
||||
static const struct attribute_group *l3c_pmu_attr_groups[] = {
|
||||
&l3c_pmu_format_attr_group,
|
||||
&pmu_cpumask_attr_group,
|
||||
&l3c_pmu_events_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group *dmc_pmu_attr_groups[] = {
|
||||
&dmc_pmu_format_attr_group,
|
||||
&pmu_cpumask_attr_group,
|
||||
&dmc_pmu_events_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
static inline u32 reg_readl(unsigned long addr)
|
||||
{
|
||||
return readl((void __iomem *)addr);
|
||||
}
|
||||
|
||||
static inline void reg_writel(u32 val, unsigned long addr)
|
||||
{
|
||||
writel(val, (void __iomem *)addr);
|
||||
}
|
||||
|
||||
static int alloc_counter(struct tx2_uncore_pmu *tx2_pmu)
|
||||
{
|
||||
int counter;
|
||||
|
||||
counter = find_first_zero_bit(tx2_pmu->active_counters,
|
||||
tx2_pmu->max_counters);
|
||||
if (counter == tx2_pmu->max_counters)
|
||||
return -ENOSPC;
|
||||
|
||||
set_bit(counter, tx2_pmu->active_counters);
|
||||
return counter;
|
||||
}
|
||||
|
||||
static inline void free_counter(struct tx2_uncore_pmu *tx2_pmu, int counter)
|
||||
{
|
||||
clear_bit(counter, tx2_pmu->active_counters);
|
||||
}
|
||||
|
||||
static void init_cntr_base_l3c(struct perf_event *event,
|
||||
struct tx2_uncore_pmu *tx2_pmu)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
/* counter ctrl/data reg offset at 8 */
|
||||
hwc->config_base = (unsigned long)tx2_pmu->base
|
||||
+ L3C_COUNTER_CTL + (8 * GET_COUNTERID(event));
|
||||
hwc->event_base = (unsigned long)tx2_pmu->base
|
||||
+ L3C_COUNTER_DATA + (8 * GET_COUNTERID(event));
|
||||
}
|
||||
|
||||
static void init_cntr_base_dmc(struct perf_event *event,
|
||||
struct tx2_uncore_pmu *tx2_pmu)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
hwc->config_base = (unsigned long)tx2_pmu->base
|
||||
+ DMC_COUNTER_CTL;
|
||||
/* counter data reg offset at 0xc */
|
||||
hwc->event_base = (unsigned long)tx2_pmu->base
|
||||
+ DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event));
|
||||
}
|
||||
|
||||
static void uncore_start_event_l3c(struct perf_event *event, int flags)
|
||||
{
|
||||
u32 val;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
/* event id encoded in bits [07:03] */
|
||||
val = GET_EVENTID(event) << 3;
|
||||
reg_writel(val, hwc->config_base);
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
reg_writel(0, hwc->event_base);
|
||||
}
|
||||
|
||||
static inline void uncore_stop_event_l3c(struct perf_event *event)
|
||||
{
|
||||
reg_writel(0, event->hw.config_base);
|
||||
}
|
||||
|
||||
static void uncore_start_event_dmc(struct perf_event *event, int flags)
|
||||
{
|
||||
u32 val;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = GET_COUNTERID(event);
|
||||
int event_id = GET_EVENTID(event);
|
||||
|
||||
/* enable and start counters.
|
||||
* 8 bits for each counter, bits[05:01] of a counter to set event type.
|
||||
*/
|
||||
val = reg_readl(hwc->config_base);
|
||||
val &= ~DMC_EVENT_CFG(idx, 0x1f);
|
||||
val |= DMC_EVENT_CFG(idx, event_id);
|
||||
reg_writel(val, hwc->config_base);
|
||||
local64_set(&hwc->prev_count, 0);
|
||||
reg_writel(0, hwc->event_base);
|
||||
}
|
||||
|
||||
static void uncore_stop_event_dmc(struct perf_event *event)
|
||||
{
|
||||
u32 val;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = GET_COUNTERID(event);
|
||||
|
||||
/* clear event type(bits[05:01]) to stop counter */
|
||||
val = reg_readl(hwc->config_base);
|
||||
val &= ~DMC_EVENT_CFG(idx, 0x1f);
|
||||
reg_writel(val, hwc->config_base);
|
||||
}
|
||||
|
||||
static void tx2_uncore_event_update(struct perf_event *event)
|
||||
{
|
||||
s64 prev, delta, new = 0;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
enum tx2_uncore_type type;
|
||||
u32 prorate_factor;
|
||||
|
||||
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
type = tx2_pmu->type;
|
||||
prorate_factor = tx2_pmu->prorate_factor;
|
||||
|
||||
new = reg_readl(hwc->event_base);
|
||||
prev = local64_xchg(&hwc->prev_count, new);
|
||||
|
||||
/* handles rollover of 32 bit counter */
|
||||
delta = (u32)(((1UL << 32) - prev) + new);
|
||||
|
||||
/* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */
|
||||
if (type == PMU_TYPE_DMC &&
|
||||
GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS)
|
||||
delta = delta/4;
|
||||
|
||||
/* L3C and DMC has 16 and 8 interleave channels respectively.
|
||||
* The sampled value is for channel 0 and multiplied with
|
||||
* prorate_factor to get the count for a device.
|
||||
*/
|
||||
local64_add(delta * prorate_factor, &event->count);
|
||||
}
|
||||
|
||||
static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
|
||||
{
|
||||
int i = 0;
|
||||
struct acpi_tx2_pmu_device {
|
||||
__u8 id[ACPI_ID_LEN];
|
||||
enum tx2_uncore_type type;
|
||||
} devices[] = {
|
||||
{"CAV901D", PMU_TYPE_L3C},
|
||||
{"CAV901F", PMU_TYPE_DMC},
|
||||
{"", PMU_TYPE_INVALID}
|
||||
};
|
||||
|
||||
while (devices[i].type != PMU_TYPE_INVALID) {
|
||||
if (!strcmp(acpi_device_hid(adev), devices[i].id))
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
|
||||
return devices[i].type;
|
||||
}
|
||||
|
||||
static bool tx2_uncore_validate_event(struct pmu *pmu,
|
||||
struct perf_event *event, int *counters)
|
||||
{
|
||||
if (is_software_event(event))
|
||||
return true;
|
||||
/* Reject groups spanning multiple HW PMUs. */
|
||||
if (event->pmu != pmu)
|
||||
return false;
|
||||
|
||||
*counters = *counters + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the group of events can be scheduled at once
|
||||
* on the PMU.
|
||||
*/
|
||||
static bool tx2_uncore_validate_event_group(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling, *leader = event->group_leader;
|
||||
int counters = 0;
|
||||
|
||||
if (event->group_leader == event)
|
||||
return true;
|
||||
|
||||
if (!tx2_uncore_validate_event(event->pmu, leader, &counters))
|
||||
return false;
|
||||
|
||||
for_each_sibling_event(sibling, leader) {
|
||||
if (!tx2_uncore_validate_event(event->pmu, sibling, &counters))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!tx2_uncore_validate_event(event->pmu, event, &counters))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the group requires more counters than the HW has,
|
||||
* it cannot ever be scheduled.
|
||||
*/
|
||||
return counters <= TX2_PMU_MAX_COUNTERS;
|
||||
}
|
||||
|
||||
|
||||
static int tx2_uncore_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
/* Test the event attr type check for PMU enumeration */
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* SOC PMU counters are shared across all cores.
|
||||
* Therefore, it does not support per-process mode.
|
||||
* Also, it does not support event sampling mode.
|
||||
*/
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EINVAL;
|
||||
|
||||
/* We have no filtering of any kind */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
if (tx2_pmu->cpu >= nr_cpu_ids)
|
||||
return -EINVAL;
|
||||
event->cpu = tx2_pmu->cpu;
|
||||
|
||||
if (event->attr.config >= tx2_pmu->max_events)
|
||||
return -EINVAL;
|
||||
|
||||
/* store event id */
|
||||
hwc->config = event->attr.config;
|
||||
|
||||
/* Validate the group */
|
||||
if (!tx2_uncore_validate_event_group(event))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tx2_uncore_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
hwc->state = 0;
|
||||
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
|
||||
tx2_pmu->start_event(event, flags);
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
/* Start timer for first event */
|
||||
if (bitmap_weight(tx2_pmu->active_counters,
|
||||
tx2_pmu->max_counters) == 1) {
|
||||
hrtimer_start(&tx2_pmu->hrtimer,
|
||||
ns_to_ktime(tx2_pmu->hrtimer_interval),
|
||||
HRTIMER_MODE_REL_PINNED);
|
||||
}
|
||||
}
|
||||
|
||||
static void tx2_uncore_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
tx2_pmu->stop_event(event);
|
||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||
hwc->state |= PERF_HES_STOPPED;
|
||||
if (flags & PERF_EF_UPDATE) {
|
||||
tx2_uncore_event_update(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
}
|
||||
|
||||
static int tx2_uncore_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
|
||||
/* Allocate a free counter */
|
||||
hwc->idx = alloc_counter(tx2_pmu);
|
||||
if (hwc->idx < 0)
|
||||
return -EAGAIN;
|
||||
|
||||
tx2_pmu->events[hwc->idx] = event;
|
||||
/* set counter control and data registers base address */
|
||||
tx2_pmu->init_cntr_base(event, tx2_pmu);
|
||||
|
||||
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
if (flags & PERF_EF_START)
|
||||
tx2_uncore_event_start(event, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tx2_uncore_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
tx2_uncore_event_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
/* clear the assigned counter */
|
||||
free_counter(tx2_pmu, GET_COUNTERID(event));
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
tx2_pmu->events[hwc->idx] = NULL;
|
||||
hwc->idx = -1;
|
||||
}
|
||||
|
||||
static void tx2_uncore_event_read(struct perf_event *event)
|
||||
{
|
||||
tx2_uncore_event_update(event);
|
||||
}
|
||||
|
||||
static enum hrtimer_restart tx2_hrtimer_callback(struct hrtimer *timer)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
int max_counters, idx;
|
||||
|
||||
tx2_pmu = container_of(timer, struct tx2_uncore_pmu, hrtimer);
|
||||
max_counters = tx2_pmu->max_counters;
|
||||
|
||||
if (bitmap_empty(tx2_pmu->active_counters, max_counters))
|
||||
return HRTIMER_NORESTART;
|
||||
|
||||
for_each_set_bit(idx, tx2_pmu->active_counters, max_counters) {
|
||||
struct perf_event *event = tx2_pmu->events[idx];
|
||||
|
||||
tx2_uncore_event_update(event);
|
||||
}
|
||||
hrtimer_forward_now(timer, ns_to_ktime(tx2_pmu->hrtimer_interval));
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
static int tx2_uncore_pmu_register(
|
||||
struct tx2_uncore_pmu *tx2_pmu)
|
||||
{
|
||||
struct device *dev = tx2_pmu->dev;
|
||||
char *name = tx2_pmu->name;
|
||||
|
||||
/* Perf event registration */
|
||||
tx2_pmu->pmu = (struct pmu) {
|
||||
.module = THIS_MODULE,
|
||||
.attr_groups = tx2_pmu->attr_groups,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = tx2_uncore_event_init,
|
||||
.add = tx2_uncore_event_add,
|
||||
.del = tx2_uncore_event_del,
|
||||
.start = tx2_uncore_event_start,
|
||||
.stop = tx2_uncore_event_stop,
|
||||
.read = tx2_uncore_event_read,
|
||||
};
|
||||
|
||||
tx2_pmu->pmu.name = devm_kasprintf(dev, GFP_KERNEL,
|
||||
"%s", name);
|
||||
|
||||
return perf_pmu_register(&tx2_pmu->pmu, tx2_pmu->pmu.name, -1);
|
||||
}
|
||||
|
||||
static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu)
|
||||
{
|
||||
int ret, cpu;
|
||||
|
||||
cpu = cpumask_any_and(cpumask_of_node(tx2_pmu->node),
|
||||
cpu_online_mask);
|
||||
|
||||
tx2_pmu->cpu = cpu;
|
||||
hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
tx2_pmu->hrtimer.function = tx2_hrtimer_callback;
|
||||
|
||||
ret = tx2_uncore_pmu_register(tx2_pmu);
|
||||
if (ret) {
|
||||
dev_err(tx2_pmu->dev, "%s PMU: Failed to init driver\n",
|
||||
tx2_pmu->name);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* register hotplug callback for the pmu */
|
||||
ret = cpuhp_state_add_instance(
|
||||
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
|
||||
&tx2_pmu->hpnode);
|
||||
if (ret) {
|
||||
dev_err(tx2_pmu->dev, "Error %d registering hotplug", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Add to list */
|
||||
list_add(&tx2_pmu->entry, &tx2_pmus);
|
||||
|
||||
dev_dbg(tx2_pmu->dev, "%s PMU UNCORE registered\n",
|
||||
tx2_pmu->pmu.name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
|
||||
acpi_handle handle, struct acpi_device *adev, u32 type)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
void __iomem *base;
|
||||
struct resource res;
|
||||
struct resource_entry *rentry;
|
||||
struct list_head list;
|
||||
int ret;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
ret = acpi_dev_get_resources(adev, &list, NULL, NULL);
|
||||
if (ret <= 0) {
|
||||
dev_err(dev, "failed to parse _CRS method, error %d\n", ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
list_for_each_entry(rentry, &list, node) {
|
||||
if (resource_type(rentry->res) == IORESOURCE_MEM) {
|
||||
res = *rentry->res;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rentry->res)
|
||||
return NULL;
|
||||
|
||||
acpi_dev_free_resource_list(&list);
|
||||
base = devm_ioremap_resource(dev, &res);
|
||||
if (IS_ERR(base)) {
|
||||
dev_err(dev, "PMU type %d: Fail to map resource\n", type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL);
|
||||
if (!tx2_pmu)
|
||||
return NULL;
|
||||
|
||||
tx2_pmu->dev = dev;
|
||||
tx2_pmu->type = type;
|
||||
tx2_pmu->base = base;
|
||||
tx2_pmu->node = dev_to_node(dev);
|
||||
INIT_LIST_HEAD(&tx2_pmu->entry);
|
||||
|
||||
switch (tx2_pmu->type) {
|
||||
case PMU_TYPE_L3C:
|
||||
tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
|
||||
tx2_pmu->prorate_factor = TX2_PMU_L3_TILES;
|
||||
tx2_pmu->max_events = L3_EVENT_MAX;
|
||||
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
|
||||
tx2_pmu->attr_groups = l3c_pmu_attr_groups;
|
||||
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
|
||||
"uncore_l3c_%d", tx2_pmu->node);
|
||||
tx2_pmu->init_cntr_base = init_cntr_base_l3c;
|
||||
tx2_pmu->start_event = uncore_start_event_l3c;
|
||||
tx2_pmu->stop_event = uncore_stop_event_l3c;
|
||||
break;
|
||||
case PMU_TYPE_DMC:
|
||||
tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
|
||||
tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS;
|
||||
tx2_pmu->max_events = DMC_EVENT_MAX;
|
||||
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
|
||||
tx2_pmu->attr_groups = dmc_pmu_attr_groups;
|
||||
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
|
||||
"uncore_dmc_%d", tx2_pmu->node);
|
||||
tx2_pmu->init_cntr_base = init_cntr_base_dmc;
|
||||
tx2_pmu->start_event = uncore_start_event_dmc;
|
||||
tx2_pmu->stop_event = uncore_stop_event_dmc;
|
||||
break;
|
||||
case PMU_TYPE_INVALID:
|
||||
devm_kfree(dev, tx2_pmu);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return tx2_pmu;
|
||||
}
|
||||
|
||||
static acpi_status tx2_uncore_pmu_add(acpi_handle handle, u32 level,
|
||||
void *data, void **return_value)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
struct acpi_device *adev;
|
||||
enum tx2_uncore_type type;
|
||||
|
||||
if (acpi_bus_get_device(handle, &adev))
|
||||
return AE_OK;
|
||||
if (acpi_bus_get_status(adev) || !adev->status.present)
|
||||
return AE_OK;
|
||||
|
||||
type = get_tx2_pmu_type(adev);
|
||||
if (type == PMU_TYPE_INVALID)
|
||||
return AE_OK;
|
||||
|
||||
tx2_pmu = tx2_uncore_pmu_init_dev((struct device *)data,
|
||||
handle, adev, type);
|
||||
|
||||
if (!tx2_pmu)
|
||||
return AE_ERROR;
|
||||
|
||||
if (tx2_uncore_pmu_add_dev(tx2_pmu)) {
|
||||
/* Can't add the PMU device, abort */
|
||||
return AE_ERROR;
|
||||
}
|
||||
return AE_OK;
|
||||
}
|
||||
|
||||
static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
|
||||
struct hlist_node *hpnode)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
|
||||
tx2_pmu = hlist_entry_safe(hpnode,
|
||||
struct tx2_uncore_pmu, hpnode);
|
||||
|
||||
/* Pick this CPU, If there is no CPU/PMU association and both are
|
||||
* from same node.
|
||||
*/
|
||||
if ((tx2_pmu->cpu >= nr_cpu_ids) &&
|
||||
(tx2_pmu->node == cpu_to_node(cpu)))
|
||||
tx2_pmu->cpu = cpu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
|
||||
struct hlist_node *hpnode)
|
||||
{
|
||||
int new_cpu;
|
||||
struct tx2_uncore_pmu *tx2_pmu;
|
||||
struct cpumask cpu_online_mask_temp;
|
||||
|
||||
tx2_pmu = hlist_entry_safe(hpnode,
|
||||
struct tx2_uncore_pmu, hpnode);
|
||||
|
||||
if (cpu != tx2_pmu->cpu)
|
||||
return 0;
|
||||
|
||||
hrtimer_cancel(&tx2_pmu->hrtimer);
|
||||
cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
|
||||
cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
|
||||
new_cpu = cpumask_any_and(
|
||||
cpumask_of_node(tx2_pmu->node),
|
||||
&cpu_online_mask_temp);
|
||||
|
||||
tx2_pmu->cpu = new_cpu;
|
||||
if (new_cpu >= nr_cpu_ids)
|
||||
return 0;
|
||||
perf_pmu_migrate_context(&tx2_pmu->pmu, cpu, new_cpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct acpi_device_id tx2_uncore_acpi_match[] = {
|
||||
{"CAV901C", 0},
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(acpi, tx2_uncore_acpi_match);
|
||||
|
||||
static int tx2_uncore_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
acpi_handle handle;
|
||||
acpi_status status;
|
||||
|
||||
set_dev_node(dev, acpi_get_node(ACPI_HANDLE(dev)));
|
||||
|
||||
if (!has_acpi_companion(dev))
|
||||
return -ENODEV;
|
||||
|
||||
handle = ACPI_HANDLE(dev);
|
||||
if (!handle)
|
||||
return -EINVAL;
|
||||
|
||||
/* Walk through the tree for all PMU UNCORE devices */
|
||||
status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
|
||||
tx2_uncore_pmu_add,
|
||||
NULL, dev, NULL);
|
||||
if (ACPI_FAILURE(status)) {
|
||||
dev_err(dev, "failed to probe PMU devices\n");
|
||||
return_ACPI_STATUS(status);
|
||||
}
|
||||
|
||||
dev_info(dev, "node%d: pmu uncore registered\n", dev_to_node(dev));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tx2_uncore_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct tx2_uncore_pmu *tx2_pmu, *temp;
|
||||
struct device *dev = &pdev->dev;
|
||||
|
||||
if (!list_empty(&tx2_pmus)) {
|
||||
list_for_each_entry_safe(tx2_pmu, temp, &tx2_pmus, entry) {
|
||||
if (tx2_pmu->node == dev_to_node(dev)) {
|
||||
cpuhp_state_remove_instance_nocalls(
|
||||
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
|
||||
&tx2_pmu->hpnode);
|
||||
perf_pmu_unregister(&tx2_pmu->pmu);
|
||||
list_del(&tx2_pmu->entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct platform_driver tx2_uncore_driver = {
|
||||
.driver = {
|
||||
.name = "tx2-uncore-pmu",
|
||||
.acpi_match_table = ACPI_PTR(tx2_uncore_acpi_match),
|
||||
},
|
||||
.probe = tx2_uncore_probe,
|
||||
.remove = tx2_uncore_remove,
|
||||
};
|
||||
|
||||
static int __init tx2_uncore_driver_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
|
||||
"perf/tx2/uncore:online",
|
||||
tx2_uncore_pmu_online_cpu,
|
||||
tx2_uncore_pmu_offline_cpu);
|
||||
if (ret) {
|
||||
pr_err("TX2 PMU: setup hotplug failed(%d)\n", ret);
|
||||
return ret;
|
||||
}
|
||||
ret = platform_driver_register(&tx2_uncore_driver);
|
||||
if (ret)
|
||||
cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
module_init(tx2_uncore_driver_init);
|
||||
|
||||
static void __exit tx2_uncore_driver_exit(void)
|
||||
{
|
||||
platform_driver_unregister(&tx2_uncore_driver);
|
||||
cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE);
|
||||
}
|
||||
module_exit(tx2_uncore_driver_exit);
|
||||
|
||||
MODULE_DESCRIPTION("ThunderX2 UNCORE PMU driver");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Ganapatrao Kulkarni <gkulkarni@cavium.com>");
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/clk.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/io.h>
|
||||
|
@ -130,12 +131,14 @@ struct xgene_pmu_ops {
|
|||
|
||||
struct xgene_pmu {
|
||||
struct device *dev;
|
||||
struct hlist_node node;
|
||||
int version;
|
||||
void __iomem *pcppmu_csr;
|
||||
u32 mcb_active_mask;
|
||||
u32 mc_active_mask;
|
||||
u32 l3c_active_mask;
|
||||
cpumask_t cpu;
|
||||
int irq;
|
||||
raw_spinlock_t lock;
|
||||
const struct xgene_pmu_ops *ops;
|
||||
struct list_head l3cpmus;
|
||||
|
@ -1806,6 +1809,53 @@ static const struct acpi_device_id xgene_pmu_acpi_match[] = {
|
|||
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
|
||||
#endif
|
||||
|
||||
static int xgene_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu,
|
||||
node);
|
||||
|
||||
if (cpumask_empty(&xgene_pmu->cpu))
|
||||
cpumask_set_cpu(cpu, &xgene_pmu->cpu);
|
||||
|
||||
/* Overflow interrupt also should use the same CPU */
|
||||
WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct xgene_pmu *xgene_pmu = hlist_entry_safe(node, struct xgene_pmu,
|
||||
node);
|
||||
struct xgene_pmu_dev_ctx *ctx;
|
||||
unsigned int target;
|
||||
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &xgene_pmu->cpu))
|
||||
return 0;
|
||||
target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
if (target >= nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
|
||||
perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
|
||||
}
|
||||
list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
|
||||
perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
|
||||
}
|
||||
list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
|
||||
perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
|
||||
}
|
||||
list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
|
||||
perf_pmu_migrate_context(&ctx->pmu_dev->pmu, cpu, target);
|
||||
}
|
||||
|
||||
cpumask_set_cpu(target, &xgene_pmu->cpu);
|
||||
/* Overflow interrupt also should use the same CPU */
|
||||
WARN_ON(irq_set_affinity(xgene_pmu->irq, &xgene_pmu->cpu));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xgene_pmu_probe(struct platform_device *pdev)
|
||||
{
|
||||
const struct xgene_pmu_data *dev_data;
|
||||
|
@ -1815,6 +1865,14 @@ static int xgene_pmu_probe(struct platform_device *pdev)
|
|||
int irq, rc;
|
||||
int version;
|
||||
|
||||
/* Install a hook to update the reader CPU in case it goes offline */
|
||||
rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
|
||||
"CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE",
|
||||
xgene_pmu_online_cpu,
|
||||
xgene_pmu_offline_cpu);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
xgene_pmu = devm_kzalloc(&pdev->dev, sizeof(*xgene_pmu), GFP_KERNEL);
|
||||
if (!xgene_pmu)
|
||||
return -ENOMEM;
|
||||
|
@ -1865,6 +1923,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
|
|||
dev_err(&pdev->dev, "No IRQ resource\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr,
|
||||
IRQF_NOBALANCING | IRQF_NO_THREAD,
|
||||
dev_name(&pdev->dev), xgene_pmu);
|
||||
|
@ -1873,6 +1932,8 @@ static int xgene_pmu_probe(struct platform_device *pdev)
|
|||
return rc;
|
||||
}
|
||||
|
||||
xgene_pmu->irq = irq;
|
||||
|
||||
raw_spin_lock_init(&xgene_pmu->lock);
|
||||
|
||||
/* Check for active MCBs and MCUs */
|
||||
|
@ -1883,13 +1944,11 @@ static int xgene_pmu_probe(struct platform_device *pdev)
|
|||
xgene_pmu->mc_active_mask = 0x1;
|
||||
}
|
||||
|
||||
/* Pick one core to use for cpumask attributes */
|
||||
cpumask_set_cpu(smp_processor_id(), &xgene_pmu->cpu);
|
||||
|
||||
/* Make sure that the overflow interrupt is handled by this CPU */
|
||||
rc = irq_set_affinity(irq, &xgene_pmu->cpu);
|
||||
/* Add this instance to the list used by the hotplug callback */
|
||||
rc = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
|
||||
&xgene_pmu->node);
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "Failed to set interrupt affinity!\n");
|
||||
dev_err(&pdev->dev, "Error %d registering hotplug", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1897,13 +1956,18 @@ static int xgene_pmu_probe(struct platform_device *pdev)
|
|||
rc = xgene_pmu_probe_pmu_dev(xgene_pmu, pdev);
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "No PMU perf devices found!\n");
|
||||
return rc;
|
||||
goto out_unregister;
|
||||
}
|
||||
|
||||
/* Enable interrupt */
|
||||
xgene_pmu->ops->unmask_int(xgene_pmu);
|
||||
|
||||
return 0;
|
||||
|
||||
out_unregister:
|
||||
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
|
||||
&xgene_pmu->node);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1924,6 +1988,8 @@ static int xgene_pmu_remove(struct platform_device *pdev)
|
|||
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->iobpmus);
|
||||
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcbpmus);
|
||||
xgene_pmu_dev_cleanup(xgene_pmu, &xgene_pmu->mcpmus);
|
||||
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
|
||||
&xgene_pmu->node);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -164,6 +164,8 @@ enum cpuhp_state {
|
|||
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
|
||||
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
|
||||
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
|
||||
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
|
||||
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
|
||||
|
|
|
@ -102,8 +102,10 @@ struct arm_pmu {
|
|||
int (*filter_match)(struct perf_event *event);
|
||||
int num_events;
|
||||
bool secure_access; /* 32-bit ARM only */
|
||||
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
|
||||
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
|
||||
DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
|
||||
#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000
|
||||
DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
|
||||
struct platform_device *plat_device;
|
||||
struct pmu_hw_events __percpu *hw_events;
|
||||
struct hlist_node node;
|
||||
|
|
Loading…
Reference in New Issue