2019-06-04 16:11:33 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2013-04-20 05:34:28 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
|
|
|
*
|
|
|
|
* Author: Jacob Shin <jacob.shin@amd.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/cpumask.h>
|
|
|
|
|
|
|
|
#include <asm/cpufeature.h>
|
|
|
|
#include <asm/perf_event.h>
|
|
|
|
#include <asm/msr.h>
|
2018-04-28 05:34:35 +08:00
|
|
|
#include <asm/smp.h>
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
#define NUM_COUNTERS_NB 4
|
|
|
|
#define NUM_COUNTERS_L2 4
|
2017-01-17 07:36:22 +08:00
|
|
|
#define NUM_COUNTERS_L3 6
|
|
|
|
#define MAX_COUNTERS 6
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
#define RDPMC_BASE_NB 6
|
2017-01-17 07:36:21 +08:00
|
|
|
#define RDPMC_BASE_LLC 10
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
#define COUNTER_SHIFT 16
|
|
|
|
|
2017-04-10 20:20:47 +08:00
|
|
|
#undef pr_fmt
|
|
|
|
#define pr_fmt(fmt) "amd_uncore: " fmt
|
|
|
|
|
2017-01-17 07:36:22 +08:00
|
|
|
static int num_counters_llc;
|
|
|
|
static int num_counters_nb;
|
2018-09-27 23:51:55 +08:00
|
|
|
static bool l3_mask;
|
2017-01-17 07:36:22 +08:00
|
|
|
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
static HLIST_HEAD(uncore_unused_list);
|
|
|
|
|
2013-04-20 05:34:28 +08:00
|
|
|
struct amd_uncore {
|
|
|
|
int id;
|
|
|
|
int refcnt;
|
|
|
|
int cpu;
|
|
|
|
int num_counters;
|
|
|
|
int rdpmc_base;
|
|
|
|
u32 msr_base;
|
|
|
|
cpumask_t *active_mask;
|
|
|
|
struct pmu *pmu;
|
|
|
|
struct perf_event *events[MAX_COUNTERS];
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
struct hlist_node node;
|
2013-04-20 05:34:28 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct amd_uncore * __percpu *amd_uncore_nb;
|
2017-01-17 07:36:21 +08:00
|
|
|
static struct amd_uncore * __percpu *amd_uncore_llc;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
static struct pmu amd_nb_pmu;
|
2017-01-17 07:36:21 +08:00
|
|
|
static struct pmu amd_llc_pmu;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
static cpumask_t amd_nb_active_mask;
|
2017-01-17 07:36:21 +08:00
|
|
|
static cpumask_t amd_llc_active_mask;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
static bool is_nb_event(struct perf_event *event)
|
|
|
|
{
|
|
|
|
return event->pmu->type == amd_nb_pmu.type;
|
|
|
|
}
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
static bool is_llc_event(struct perf_event *event)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
2017-01-17 07:36:21 +08:00
|
|
|
return event->pmu->type == amd_llc_pmu.type;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
|
|
|
|
{
|
|
|
|
if (is_nb_event(event) && amd_uncore_nb)
|
|
|
|
return *per_cpu_ptr(amd_uncore_nb, event->cpu);
|
2017-01-17 07:36:21 +08:00
|
|
|
else if (is_llc_event(event) && amd_uncore_llc)
|
|
|
|
return *per_cpu_ptr(amd_uncore_llc, event->cpu);
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void amd_uncore_read(struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
u64 prev, new;
|
|
|
|
s64 delta;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* since we do not enable counter overflow interrupts,
|
|
|
|
* we do not have to worry about prev_count changing on us
|
|
|
|
*/
|
|
|
|
|
|
|
|
prev = local64_read(&hwc->prev_count);
|
|
|
|
rdpmcl(hwc->event_base_rdpmc, new);
|
|
|
|
local64_set(&hwc->prev_count, new);
|
|
|
|
delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
|
|
|
|
delta >>= COUNTER_SHIFT;
|
|
|
|
local64_add(delta, &event->count);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void amd_uncore_start(struct perf_event *event, int flags)
|
|
|
|
{
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
if (flags & PERF_EF_RELOAD)
|
|
|
|
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
|
|
|
|
|
|
|
hwc->state = 0;
|
|
|
|
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
|
|
|
|
perf_event_update_userpage(event);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void amd_uncore_stop(struct perf_event *event, int flags)
|
|
|
|
{
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
wrmsrl(hwc->config_base, hwc->config);
|
|
|
|
hwc->state |= PERF_HES_STOPPED;
|
|
|
|
|
|
|
|
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
|
|
|
|
amd_uncore_read(event);
|
|
|
|
hwc->state |= PERF_HES_UPTODATE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int amd_uncore_add(struct perf_event *event, int flags)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
/* are we already assigned? */
|
|
|
|
if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
for (i = 0; i < uncore->num_counters; i++) {
|
|
|
|
if (uncore->events[i] == event) {
|
|
|
|
hwc->idx = i;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if not, take the first available counter */
|
|
|
|
hwc->idx = -1;
|
|
|
|
for (i = 0; i < uncore->num_counters; i++) {
|
|
|
|
if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
|
|
|
|
hwc->idx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (hwc->idx == -1)
|
|
|
|
return -EBUSY;
|
|
|
|
|
|
|
|
hwc->config_base = uncore->msr_base + (2 * hwc->idx);
|
|
|
|
hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
|
|
|
|
hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
|
|
|
|
hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
|
|
|
|
|
|
|
if (flags & PERF_EF_START)
|
|
|
|
amd_uncore_start(event, PERF_EF_RELOAD);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void amd_uncore_del(struct perf_event *event, int flags)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct amd_uncore *uncore = event_to_amd_uncore(event);
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
amd_uncore_stop(event, PERF_EF_UPDATE);
|
|
|
|
|
|
|
|
for (i = 0; i < uncore->num_counters; i++) {
|
|
|
|
if (cmpxchg(&uncore->events[i], event, NULL) == event)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
hwc->idx = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int amd_uncore_event_init(struct perf_event *event)
|
|
|
|
{
|
|
|
|
struct amd_uncore *uncore;
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
if (event->attr.type != event->pmu->type)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
/*
|
2017-01-17 07:36:21 +08:00
|
|
|
* NB and Last level cache counters (MSRs) are shared across all cores
|
|
|
|
* that share the same NB / Last level cache. Interrupts can be directed
|
|
|
|
* to a single target core, however, event counts generated by processes
|
|
|
|
* running on other cores cannot be masked out. So we do not support
|
|
|
|
* sampling and per-thread events.
|
2013-04-20 05:34:28 +08:00
|
|
|
*/
|
|
|
|
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* and we do not enable counter overflow interrupts */
|
|
|
|
hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
|
|
|
|
hwc->idx = -1;
|
|
|
|
|
2018-09-27 23:51:55 +08:00
|
|
|
/*
|
|
|
|
* SliceMask and ThreadMask need to be set for certain L3 events in
|
|
|
|
* Family 17h. For other events, the two fields do not affect the count.
|
|
|
|
*/
|
|
|
|
if (l3_mask)
|
|
|
|
hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
|
|
|
|
|
2013-04-20 05:34:28 +08:00
|
|
|
if (event->cpu < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
uncore = event_to_amd_uncore(event);
|
|
|
|
if (!uncore)
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* since request can come in to any of the shared cores, we will remap
|
|
|
|
* to a single common cpu.
|
|
|
|
*/
|
|
|
|
event->cpu = uncore->cpu;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
|
|
|
|
struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
cpumask_t *active_mask;
|
|
|
|
struct pmu *pmu = dev_get_drvdata(dev);
|
|
|
|
|
|
|
|
if (pmu->type == amd_nb_pmu.type)
|
|
|
|
active_mask = &amd_nb_active_mask;
|
2017-01-17 07:36:21 +08:00
|
|
|
else if (pmu->type == amd_llc_pmu.type)
|
|
|
|
active_mask = &amd_llc_active_mask;
|
2013-04-20 05:34:28 +08:00
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
|
2014-09-30 21:48:22 +08:00
|
|
|
return cpumap_print_to_pagebuf(true, buf, active_mask);
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
|
|
|
|
|
|
|
|
static struct attribute *amd_uncore_attrs[] = {
|
|
|
|
&dev_attr_cpumask.attr,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct attribute_group amd_uncore_attr_group = {
|
|
|
|
.attrs = amd_uncore_attrs,
|
|
|
|
};
|
|
|
|
|
2017-01-17 07:36:23 +08:00
|
|
|
/*
|
|
|
|
* Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
|
|
|
|
* on family
|
|
|
|
*/
|
|
|
|
#define AMD_FORMAT_ATTR(_dev, _name, _format) \
|
|
|
|
static ssize_t \
|
|
|
|
_dev##_show##_name(struct device *dev, \
|
|
|
|
struct device_attribute *attr, \
|
|
|
|
char *page) \
|
|
|
|
{ \
|
|
|
|
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
|
|
|
return sprintf(page, _format "\n"); \
|
|
|
|
} \
|
|
|
|
static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
|
|
|
|
|
|
|
|
/* Used for each uncore counter type */
|
|
|
|
#define AMD_ATTRIBUTE(_name) \
|
|
|
|
static struct attribute *amd_uncore_format_attr_##_name[] = { \
|
|
|
|
&format_attr_event_##_name.attr, \
|
|
|
|
&format_attr_umask.attr, \
|
|
|
|
NULL, \
|
|
|
|
}; \
|
|
|
|
static struct attribute_group amd_uncore_format_group_##_name = { \
|
|
|
|
.name = "format", \
|
|
|
|
.attrs = amd_uncore_format_attr_##_name, \
|
|
|
|
}; \
|
|
|
|
static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
|
|
|
|
&amd_uncore_attr_group, \
|
|
|
|
&amd_uncore_format_group_##_name, \
|
|
|
|
NULL, \
|
2013-04-20 05:34:28 +08:00
|
|
|
};
|
|
|
|
|
2017-01-17 07:36:23 +08:00
|
|
|
AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
|
|
|
|
AMD_FORMAT_ATTR(umask, , "config:8-15");
|
|
|
|
AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
|
|
|
|
AMD_FORMAT_ATTR(event, _l3, "config:0-7");
|
|
|
|
AMD_ATTRIBUTE(df);
|
|
|
|
AMD_ATTRIBUTE(l3);
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
static struct pmu amd_nb_pmu = {
|
2016-04-04 22:02:08 +08:00
|
|
|
.task_ctx_nr = perf_invalid_context,
|
2013-04-20 05:34:28 +08:00
|
|
|
.event_init = amd_uncore_event_init,
|
|
|
|
.add = amd_uncore_add,
|
|
|
|
.del = amd_uncore_del,
|
|
|
|
.start = amd_uncore_start,
|
|
|
|
.stop = amd_uncore_stop,
|
|
|
|
.read = amd_uncore_read,
|
2019-01-10 21:53:33 +08:00
|
|
|
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
2013-04-20 05:34:28 +08:00
|
|
|
};
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
static struct pmu amd_llc_pmu = {
|
2016-04-04 22:02:08 +08:00
|
|
|
.task_ctx_nr = perf_invalid_context,
|
2013-04-20 05:34:28 +08:00
|
|
|
.event_init = amd_uncore_event_init,
|
|
|
|
.add = amd_uncore_add,
|
|
|
|
.del = amd_uncore_del,
|
|
|
|
.start = amd_uncore_start,
|
|
|
|
.stop = amd_uncore_stop,
|
|
|
|
.read = amd_uncore_read,
|
2019-01-10 21:53:33 +08:00
|
|
|
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
2013-04-20 05:34:28 +08:00
|
|
|
};
|
|
|
|
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
|
|
|
|
cpu_to_node(cpu));
|
|
|
|
}
|
|
|
|
|
2014-06-11 12:09:03 +08:00
|
|
|
static int amd_uncore_cpu_up_prepare(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
2017-01-17 07:36:21 +08:00
|
|
|
struct amd_uncore *uncore_nb = NULL, *uncore_llc;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
if (amd_uncore_nb) {
|
2014-06-11 12:09:03 +08:00
|
|
|
uncore_nb = amd_uncore_alloc(cpu);
|
|
|
|
if (!uncore_nb)
|
|
|
|
goto fail;
|
|
|
|
uncore_nb->cpu = cpu;
|
2017-01-17 07:36:22 +08:00
|
|
|
uncore_nb->num_counters = num_counters_nb;
|
2014-06-11 12:09:03 +08:00
|
|
|
uncore_nb->rdpmc_base = RDPMC_BASE_NB;
|
|
|
|
uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
|
|
|
|
uncore_nb->active_mask = &amd_nb_active_mask;
|
|
|
|
uncore_nb->pmu = &amd_nb_pmu;
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
uncore_nb->id = -1;
|
2014-06-11 12:09:03 +08:00
|
|
|
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc) {
|
|
|
|
uncore_llc = amd_uncore_alloc(cpu);
|
|
|
|
if (!uncore_llc)
|
2014-06-11 12:09:03 +08:00
|
|
|
goto fail;
|
2017-01-17 07:36:21 +08:00
|
|
|
uncore_llc->cpu = cpu;
|
2017-01-17 07:36:22 +08:00
|
|
|
uncore_llc->num_counters = num_counters_llc;
|
2017-01-17 07:36:21 +08:00
|
|
|
uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
|
|
|
|
uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
|
|
|
|
uncore_llc->active_mask = &amd_llc_active_mask;
|
|
|
|
uncore_llc->pmu = &amd_llc_pmu;
|
|
|
|
uncore_llc->id = -1;
|
|
|
|
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
2014-06-11 12:09:03 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
fail:
|
2016-02-17 06:04:41 +08:00
|
|
|
if (amd_uncore_nb)
|
|
|
|
*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
|
2014-06-11 12:09:03 +08:00
|
|
|
kfree(uncore_nb);
|
|
|
|
return -ENOMEM;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct amd_uncore *
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
amd_uncore_find_online_sibling(struct amd_uncore *this,
|
|
|
|
struct amd_uncore * __percpu *uncores)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu;
|
|
|
|
struct amd_uncore *that;
|
|
|
|
|
|
|
|
for_each_online_cpu(cpu) {
|
|
|
|
that = *per_cpu_ptr(uncores, cpu);
|
|
|
|
|
|
|
|
if (!that)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (this == that)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (this->id == that->id) {
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
hlist_add_head(&this->node, &uncore_unused_list);
|
2013-04-20 05:34:28 +08:00
|
|
|
this = that;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
this->refcnt++;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
static int amd_uncore_cpu_starting(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
unsigned int eax, ebx, ecx, edx;
|
|
|
|
struct amd_uncore *uncore;
|
|
|
|
|
|
|
|
if (amd_uncore_nb) {
|
|
|
|
uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
|
|
|
|
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
|
|
|
|
uncore->id = ecx & 0xff;
|
|
|
|
|
|
|
|
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
|
|
|
|
*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
|
|
|
|
}
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc) {
|
|
|
|
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
|
2018-04-28 05:34:35 +08:00
|
|
|
uncore->id = per_cpu(cpu_llc_id, cpu);
|
2013-04-20 05:34:28 +08:00
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
|
|
|
|
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
2016-07-14 01:16:13 +08:00
|
|
|
|
|
|
|
return 0;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
static void uncore_clean_online(void)
|
|
|
|
{
|
|
|
|
struct amd_uncore *uncore;
|
|
|
|
struct hlist_node *n;
|
|
|
|
|
|
|
|
hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
|
|
|
|
hlist_del(&uncore->node);
|
|
|
|
kfree(uncore);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static void uncore_online(unsigned int cpu,
|
|
|
|
struct amd_uncore * __percpu *uncores)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
|
|
|
|
perf/x86/amd/uncore: Prevent use after free
The resent conversion of the cpu hotplug support in the uncore driver
introduced a regression due to the way the callbacks are invoked at
initialization time.
The old code called the prepare/starting/online function on each online cpu
as a block. The new code registers the hotplug callbacks in the core for
each state. The core invokes the callbacks at each registration on all
online cpus.
The code implicitely relied on the prepare/starting/online callbacks being
called as combo on a particular cpu, which was not obvious and completely
undocumented.
The resulting subtle wreckage happens due to the way how the uncore code
manages shared data structures for cpus which share an uncore resource in
hardware. The sharing is determined in the cpu starting callback, but the
prepare callback allocates per cpu data for the upcoming cpu because
potential sharing is unknown at this point. If the starting callback finds
a online cpu which shares the hardware resource it takes a refcount on the
percpu data of that cpu and puts the own data structure into a
'free_at_online' pointer of that shared data structure. The online callback
frees that.
With the old model this worked because in a starting callback only one non
unused structure (the one of the starting cpu) was available. The new code
allocates the data structures for all cpus when the prepare callback is
registered.
Now the starting function iterates through all online cpus and looks for a
data structure (skipping its own) which has a matching hardware id. The id
member of the data structure is initialized to 0, but the hardware id can
be 0 as well. The resulting wreckage is:
CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts
its own data structure into CPU1s data structure to be freed.
CPU1 skips CPU0 because the data structure is its allegedly unsued own.
It finds a matching id on CPU2, takes a refcount on CPU1 data and puts
its own data structure into CPU2s data structure to be freed.
....
Now the online callbacks are invoked.
CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So
far so good.
CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which
is still referenced by CPU0 ---> Booom
So there are two issues to be solved here:
1) The id field must be initialized at allocation time to a value which
cannot be a valid hardware id, i.e. -1
This prevents the above scenario, but now CPU1 and CPU2 both stick their
own data structure into the free_at_online pointer of CPU0. So we leak
CPU1s data structure.
2) Fix the memory leak described in #1
Instead of having a single pointer, use a hlist to enqueue the
superflous data structures which are then freed by the first cpu
invoking the online callback.
Ideally we should know the sharing _before_ invoking the prepare callback,
but that's way beyond the scope of this bug fix.
[ tglx: Rewrote changelog ]
Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine")
Reported-and-tested-by: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-09-10 00:08:23 +08:00
|
|
|
uncore_clean_online();
|
2013-04-20 05:34:28 +08:00
|
|
|
|
|
|
|
if (cpu == uncore->cpu)
|
|
|
|
cpumask_set_cpu(cpu, uncore->active_mask);
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
static int amd_uncore_cpu_online(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
if (amd_uncore_nb)
|
|
|
|
uncore_online(cpu, amd_uncore_nb);
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc)
|
|
|
|
uncore_online(cpu, amd_uncore_llc);
|
2016-07-14 01:16:13 +08:00
|
|
|
|
|
|
|
return 0;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static void uncore_down_prepare(unsigned int cpu,
|
|
|
|
struct amd_uncore * __percpu *uncores)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
|
|
|
|
|
|
|
|
if (this->cpu != cpu)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* this cpu is going down, migrate to a shared sibling if possible */
|
|
|
|
for_each_online_cpu(i) {
|
|
|
|
struct amd_uncore *that = *per_cpu_ptr(uncores, i);
|
|
|
|
|
|
|
|
if (cpu == i)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (this == that) {
|
|
|
|
perf_pmu_migrate_context(this->pmu, cpu, i);
|
|
|
|
cpumask_clear_cpu(cpu, that->active_mask);
|
|
|
|
cpumask_set_cpu(i, that->active_mask);
|
|
|
|
that->cpu = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
static int amd_uncore_cpu_down_prepare(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
if (amd_uncore_nb)
|
|
|
|
uncore_down_prepare(cpu, amd_uncore_nb);
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc)
|
|
|
|
uncore_down_prepare(cpu, amd_uncore_llc);
|
2016-07-14 01:16:13 +08:00
|
|
|
|
|
|
|
return 0;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
|
|
|
|
|
|
|
|
if (cpu == uncore->cpu)
|
|
|
|
cpumask_clear_cpu(cpu, uncore->active_mask);
|
|
|
|
|
|
|
|
if (!--uncore->refcnt)
|
|
|
|
kfree(uncore);
|
2014-06-11 12:09:03 +08:00
|
|
|
*per_cpu_ptr(uncores, cpu) = NULL;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
static int amd_uncore_cpu_dead(unsigned int cpu)
|
2013-04-20 05:34:28 +08:00
|
|
|
{
|
|
|
|
if (amd_uncore_nb)
|
|
|
|
uncore_dead(cpu, amd_uncore_nb);
|
|
|
|
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc)
|
|
|
|
uncore_dead(cpu, amd_uncore_llc);
|
2013-04-20 05:34:28 +08:00
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
return 0;
|
2014-06-11 12:09:03 +08:00
|
|
|
}
|
|
|
|
|
2013-04-20 05:34:28 +08:00
|
|
|
static int __init amd_uncore_init(void)
|
|
|
|
{
|
|
|
|
int ret = -ENODEV;
|
|
|
|
|
2018-09-23 17:34:47 +08:00
|
|
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
|
|
|
|
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
|
2017-04-10 20:20:45 +08:00
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
|
|
|
|
return -ENODEV;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
2018-09-23 17:34:47 +08:00
|
|
|
if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
|
2017-04-10 20:20:46 +08:00
|
|
|
/*
|
2018-09-23 17:34:47 +08:00
|
|
|
* For F17h or F18h, the Northbridge counters are
|
|
|
|
* repurposed as Data Fabric counters. Also, L3
|
|
|
|
* counters are supported too. The PMUs are exported
|
|
|
|
* based on family as either L2 or L3 and NB or DF.
|
2017-04-10 20:20:46 +08:00
|
|
|
*/
|
|
|
|
num_counters_nb = NUM_COUNTERS_NB;
|
|
|
|
num_counters_llc = NUM_COUNTERS_L3;
|
|
|
|
amd_nb_pmu.name = "amd_df";
|
|
|
|
amd_llc_pmu.name = "amd_l3";
|
|
|
|
format_attr_event_df.show = &event_show_df;
|
|
|
|
format_attr_event_l3.show = &event_show_l3;
|
2018-09-27 23:51:55 +08:00
|
|
|
l3_mask = true;
|
2017-04-10 20:20:46 +08:00
|
|
|
} else {
|
|
|
|
num_counters_nb = NUM_COUNTERS_NB;
|
|
|
|
num_counters_llc = NUM_COUNTERS_L2;
|
|
|
|
amd_nb_pmu.name = "amd_nb";
|
|
|
|
amd_llc_pmu.name = "amd_l2";
|
|
|
|
format_attr_event_df = format_attr_event;
|
|
|
|
format_attr_event_l3 = format_attr_event;
|
2018-09-27 23:51:55 +08:00
|
|
|
l3_mask = false;
|
2017-01-17 07:36:22 +08:00
|
|
|
}
|
2017-04-10 20:20:46 +08:00
|
|
|
|
|
|
|
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
2017-01-17 07:36:23 +08:00
|
|
|
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
2017-01-17 07:36:22 +08:00
|
|
|
|
2015-12-07 17:39:41 +08:00
|
|
|
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
|
2013-04-20 05:34:28 +08:00
|
|
|
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
2014-06-11 12:09:03 +08:00
|
|
|
if (!amd_uncore_nb) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail_nb;
|
|
|
|
}
|
|
|
|
ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
|
|
|
|
if (ret)
|
|
|
|
goto fail_nb;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
2018-09-23 17:34:47 +08:00
|
|
|
pr_info("%s NB counters detected\n",
|
|
|
|
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
|
|
|
|
"HYGON" : "AMD");
|
2013-04-20 05:34:28 +08:00
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
2017-06-15 00:26:57 +08:00
|
|
|
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
|
2017-01-17 07:36:21 +08:00
|
|
|
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
|
|
|
|
if (!amd_uncore_llc) {
|
2014-06-11 12:09:03 +08:00
|
|
|
ret = -ENOMEM;
|
2017-01-17 07:36:21 +08:00
|
|
|
goto fail_llc;
|
2014-06-11 12:09:03 +08:00
|
|
|
}
|
2017-01-17 07:36:21 +08:00
|
|
|
ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
|
2014-06-11 12:09:03 +08:00
|
|
|
if (ret)
|
2017-01-17 07:36:21 +08:00
|
|
|
goto fail_llc;
|
2013-04-20 05:34:28 +08:00
|
|
|
|
2018-09-23 17:34:47 +08:00
|
|
|
pr_info("%s LLC counters detected\n",
|
|
|
|
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
|
|
|
|
"HYGON" : "AMD");
|
2013-04-20 05:34:28 +08:00
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
/*
|
|
|
|
* Install callbacks. Core will call them for each online cpu.
|
|
|
|
*/
|
|
|
|
if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
|
2016-12-22 03:19:54 +08:00
|
|
|
"perf/x86/amd/uncore:prepare",
|
2016-07-14 01:16:13 +08:00
|
|
|
amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
|
2017-01-17 07:36:21 +08:00
|
|
|
goto fail_llc;
|
2016-07-14 01:16:13 +08:00
|
|
|
|
|
|
|
if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
|
2016-12-22 03:19:54 +08:00
|
|
|
"perf/x86/amd/uncore:starting",
|
2016-07-14 01:16:13 +08:00
|
|
|
amd_uncore_cpu_starting, NULL))
|
|
|
|
goto fail_prep;
|
|
|
|
if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
|
2016-12-22 03:19:54 +08:00
|
|
|
"perf/x86/amd/uncore:online",
|
2016-07-14 01:16:13 +08:00
|
|
|
amd_uncore_cpu_online,
|
|
|
|
amd_uncore_cpu_down_prepare))
|
|
|
|
goto fail_start;
|
2013-04-20 05:34:28 +08:00
|
|
|
return 0;
|
2014-06-11 12:09:03 +08:00
|
|
|
|
2016-07-14 01:16:13 +08:00
|
|
|
fail_start:
|
|
|
|
cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
|
|
|
|
fail_prep:
|
|
|
|
cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
|
2017-01-17 07:36:21 +08:00
|
|
|
fail_llc:
|
2015-12-07 17:39:41 +08:00
|
|
|
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
|
2014-06-11 12:09:03 +08:00
|
|
|
perf_pmu_unregister(&amd_nb_pmu);
|
2017-01-17 07:36:21 +08:00
|
|
|
if (amd_uncore_llc)
|
|
|
|
free_percpu(amd_uncore_llc);
|
2014-06-11 12:09:03 +08:00
|
|
|
fail_nb:
|
|
|
|
if (amd_uncore_nb)
|
|
|
|
free_percpu(amd_uncore_nb);
|
|
|
|
|
|
|
|
return ret;
|
2013-04-20 05:34:28 +08:00
|
|
|
}
|
|
|
|
device_initcall(amd_uncore_init);
|