drivers/perf: hisi: Add new functions for L3C PMU

On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU:

* tt_req: it is the abbreviation of tracetag request and allows user to
count only read/write/atomic operations. tt_req is 3-bit and details are
listed in the hisi-pmu document.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5

* tt_core: it is the abbreviation of tracetag core and allows user to
filter by core/thread within the cluster, it is a 8-bit bitmap that each
bit represents the corresponding core/thread in this L3C.
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5

* datasrc_cfg: it is the abbreviation of data source configuration and
allows user to check where the data comes from, such as: from local DDR,
cross-die DDR or cross-socket DDR. Its is 5-bit and represents different
data source in the SoC.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5

* datasrc_skt: it is the abbreviation of data source from another socket
and is used in the multi-chips, if user wants to check the cross-socket
datat source, it shall be added in perf command. Only one bit is used to
control this.
$# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Co-developed-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1615186237-22263-5-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
Shaokun Zhang 2021-03-08 14:50:32 +08:00 committed by Will Deacon
parent 3da582df57
commit 486a7f46b9
3 changed files with 258 additions and 20 deletions

View File

@ -23,12 +23,17 @@
#define L3C_INT_MASK 0x0800
#define L3C_INT_STATUS 0x0808
#define L3C_INT_CLEAR 0x080c
#define L3C_CORE_CTRL 0x1b04
#define L3C_TRACETAG_CTRL 0x1b20
#define L3C_DATSRC_TYPE 0x1b48
#define L3C_DATSRC_CTRL 0x1bf0
#define L3C_EVENT_CTRL 0x1c00
#define L3C_VERSION 0x1cf0
#define L3C_EVENT_TYPE0 0x1d00
/*
* Each counter is 48-bits and [48:63] are reserved
* which are Read-As-Zero and Writes-Ignored.
* If the HW version only supports a 48-bit counter, then
* bits [63:48] are reserved, which are Read-As-Zero and
* Writes-Ignored.
*/
#define L3C_CNTR0_LOWER 0x1e00
@ -36,8 +41,186 @@
#define L3C_NR_COUNTERS 0x8
#define L3C_PERF_CTRL_EN 0x10000
#define L3C_TRACETAG_EN BIT(31)
#define L3C_TRACETAG_REQ_SHIFT 7
#define L3C_TRACETAG_MARK_EN BIT(0)
#define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2))
#define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3))
#define L3C_CORE_EN BIT(20)
#define L3C_COER_NONE 0x0
#define L3C_DATSRC_MASK 0xFF
#define L3C_DATSRC_SKT_EN BIT(23)
#define L3C_DATSRC_NONE 0x0
#define L3C_EVTYPE_NONE 0xff
#define L3C_V1_NR_EVENTS 0x59
#define L3C_V2_NR_EVENTS 0xFF
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Set request-type for tracetag */
val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
val |= L3C_TRACETAG_REQ_EN;
writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
/* Enable request-tracetag statistics */
val = readl(l3c_pmu->base + L3C_PERF_CTRL);
val |= L3C_TRACETAG_EN;
writel(val, l3c_pmu->base + L3C_PERF_CTRL);
}
}
static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 tt_req = hisi_get_tt_req(event);
if (tt_req) {
u32 val;
/* Clear request-type */
val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
val &= ~L3C_TRACETAG_REQ_EN;
writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
/* Disable request-tracetag statistics */
val = readl(l3c_pmu->base + L3C_PERF_CTRL);
val &= ~L3C_TRACETAG_EN;
writel(val, l3c_pmu->base + L3C_PERF_CTRL);
}
}
static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
u32 reg, reg_idx, shift, val;
int idx = hwc->idx;
/*
* Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
* There are 2 datasource ctrl register for the 8 hardware counters.
* Datasrc is 8-bits and for the former 4 hardware counters,
* L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
* L3C_DATSRC_TYPE1 is chosen.
*/
reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
reg_idx = idx % 4;
shift = 8 * reg_idx;
val = readl(l3c_pmu->base + reg);
val &= ~(L3C_DATSRC_MASK << shift);
val |= ds_cfg << shift;
writel(val, l3c_pmu->base + reg);
}
static void hisi_l3c_pmu_config_ds(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
if (ds_cfg)
hisi_l3c_pmu_write_ds(event, ds_cfg);
if (ds_skt) {
u32 val;
val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
val |= L3C_DATSRC_SKT_EN;
writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
}
}
static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 ds_cfg = hisi_get_datasrc_cfg(event);
u32 ds_skt = hisi_get_datasrc_skt(event);
if (ds_cfg)
hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
if (ds_skt) {
u32 val;
val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
val &= ~L3C_DATSRC_SKT_EN;
writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
}
}
static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Config and enable core information */
writel(core, l3c_pmu->base + L3C_CORE_CTRL);
val = readl(l3c_pmu->base + L3C_PERF_CTRL);
val |= L3C_CORE_EN;
writel(val, l3c_pmu->base + L3C_PERF_CTRL);
/* Enable core-tracetag statistics */
val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
val |= L3C_TRACETAG_CORE_EN;
writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
}
}
static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
{
struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
u32 core = hisi_get_tt_core(event);
if (core) {
u32 val;
/* Clear core information */
writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
val = readl(l3c_pmu->base + L3C_PERF_CTRL);
val &= ~L3C_CORE_EN;
writel(val, l3c_pmu->base + L3C_PERF_CTRL);
/* Disable core-tracetag statistics */
val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
val &= ~L3C_TRACETAG_CORE_EN;
writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
}
}
static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
{
if (event->attr.config1 != 0x0) {
hisi_l3c_pmu_config_req_tracetag(event);
hisi_l3c_pmu_config_core_tracetag(event);
hisi_l3c_pmu_config_ds(event);
}
}
static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
{
if (event->attr.config1 != 0x0) {
hisi_l3c_pmu_clear_ds(event);
hisi_l3c_pmu_clear_core_tracetag(event);
hisi_l3c_pmu_clear_req_tracetag(event);
}
}
/*
* Select the counter register offset using the counter index
@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc)
{
/* Read 64-bits and the upper 16 bits are RAZ */
return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
struct hw_perf_event *hwc, u64 val)
{
/* Write 64-bits and the upper 16 bits are WI */
writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
}
@ -166,23 +347,14 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
{ "HISI0213", },
{},
{ "HISI0214", },
{}
};
MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
unsigned long long id;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
"_UID", NULL, &id);
if (ACPI_FAILURE(status))
return -EINVAL;
l3c_pmu->index_id = id;
/*
* Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
* SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@ -220,6 +392,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
.attrs = hisi_l3c_pmu_v1_format_attr,
};
static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
NULL
};
static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
.name = "format",
.attrs = hisi_l3c_pmu_v2_format_attr,
};
static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00),
HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01),
@ -242,6 +428,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
.attrs = hisi_l3c_pmu_v1_events_attr,
};
static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
HISI_PMU_EVENT_ATTR(l3c_hit, 0x48),
HISI_PMU_EVENT_ATTR(cycles, 0x7f),
HISI_PMU_EVENT_ATTR(l3c_ref, 0xb8),
HISI_PMU_EVENT_ATTR(dat_access, 0xb9),
NULL
};
static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
.name = "events",
.attrs = hisi_l3c_pmu_v2_events_attr,
};
static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
@ -273,6 +472,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
NULL,
};
static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
&hisi_l3c_pmu_v2_format_group,
&hisi_l3c_pmu_v2_events_group,
&hisi_l3c_pmu_cpumask_attr_group,
&hisi_l3c_pmu_identifier_group,
NULL
};
static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.write_evtype = hisi_l3c_pmu_write_evtype,
.get_event_idx = hisi_uncore_pmu_get_event_idx,
@ -286,6 +493,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
.read_counter = hisi_l3c_pmu_read_counter,
.get_int_status = hisi_l3c_pmu_get_int_status,
.clear_int_status = hisi_l3c_pmu_clear_int_status,
.enable_filter = hisi_l3c_pmu_enable_filter,
.disable_filter = hisi_l3c_pmu_disable_filter,
};
static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@ -301,12 +510,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
if (ret)
return ret;
l3c_pmu->num_counters = L3C_NR_COUNTERS;
if (l3c_pmu->identifier >= HISI_PMU_V2) {
l3c_pmu->counter_bits = 64;
l3c_pmu->check_event = L3C_V2_NR_EVENTS;
l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
} else {
l3c_pmu->counter_bits = 48;
l3c_pmu->check_event = L3C_V1_NR_EVENTS;
l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
}
l3c_pmu->num_counters = L3C_NR_COUNTERS;
l3c_pmu->ops = &hisi_uncore_l3c_ops;
l3c_pmu->dev = &pdev->dev;
l3c_pmu->on_cpu = -1;
l3c_pmu->check_event = L3C_V1_NR_EVENTS;
return 0;
}
@ -334,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
return ret;
}
/*
* CCL_ID is used to identify the L3C in the same SCCL which was
* used _UID by mistake.
*/
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
l3c_pmu->sccl_id, l3c_pmu->index_id);
l3c_pmu->sccl_id, l3c_pmu->ccl_id);
l3c_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
@ -348,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
.start = hisi_uncore_pmu_start,
.stop = hisi_uncore_pmu_stop,
.read = hisi_uncore_pmu_read,
.attr_groups = hisi_l3c_pmu_v1_attr_groups,
.attr_groups = l3c_pmu->pmu_events.attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};

View File

@ -21,7 +21,7 @@
#include "hisi_uncore_pmu.h"
#define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
/*
* PMU format attributes
@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event)
hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
HISI_GET_EVENTID(event));
if (hisi_pmu->ops->enable_filter)
hisi_pmu->ops->enable_filter(event);
hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
}
@ -259,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)
hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
if (hisi_pmu->ops->disable_filter)
hisi_pmu->ops->disable_filter(event);
}
void hisi_uncore_pmu_set_event_period(struct perf_event *event)

View File

@ -11,6 +11,7 @@
#ifndef __HISI_UNCORE_PMU_H__
#define __HISI_UNCORE_PMU_H__
#include <linux/bitfield.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/kernel.h>
@ -22,6 +23,7 @@
#undef pr_fmt
#define pr_fmt(fmt) "hisi_pmu: " fmt
#define HISI_PMU_V2 0x30
#define HISI_MAX_COUNTERS 0x10
#define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu))
@ -35,6 +37,12 @@
#define HISI_PMU_EVENT_ATTR(_name, _config) \
HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo) \
static inline u32 hisi_get_##name(struct perf_event *event) \
{ \
return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \
}
struct hisi_pmu;
struct hisi_uncore_ops {
@ -50,11 +58,14 @@ struct hisi_uncore_ops {
void (*stop_counters)(struct hisi_pmu *);
u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
void (*enable_filter)(struct perf_event *event);
void (*disable_filter)(struct perf_event *event);
};
struct hisi_pmu_hwevents {
struct perf_event *hw_events[HISI_MAX_COUNTERS];
DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
const struct attribute_group **attr_groups;
};
/* Generic pmu struct for different pmu types */