Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle were: Kernel side changes: - Kprobes and uprobes changes: - Make their trampolines read-only while they are used - Make UPROBES_EVENTS default-y which is the distro practice - Apply misc fixes and robustization to probe point insertion. - add support for AMD IOMMU events - extend hw events on Intel Goldmont CPUs - ... plus misc fixes and updates. Tooling side changes: - support s390 jump instructions in perf annotate (Christian Borntraeger) - vendor hardware events updates (Andi Kleen) - add argument support for SDT events in powerpc (Ravi Bangoria) - beautify the statx syscall arguments in 'perf trace' (Arnaldo Carvalho de Melo) - handle inline functions in callchains (Jin Yao) - enable sorting by srcline as key (Milian Wolff) - add 'brstackinsn' field in 'perf script' to reuse the x86 instruction decoder used in the Intel PT code to study hot paths to samples (Andi Kleen) - add PERF_RECORD_NAMESPACES so that the kernel can record information required to associate samples to namespaces, helping in container problem characterization. (Hari Bathini) - allow sorting by symbol_size in 'perf report' and 'perf top' (Charles Baylis) - in perf stat, make system wide (-a) the default option if no target was specified and one of following conditions is met: - no workload specified (current behaviour) - a workload is specified but all requested events are system wide ones, like uncore ones. (Jiri Olsa) - ... plus lots of other updates, enhancements, cleanups and fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (235 commits) perf tools: Fix the code to strip command name tools arch x86: Sync cpufeatures.h tools arch: Sync arch/x86/lib/memcpy_64.S with the kernel tools: Update asm-generic/mman-common.h copy from the kernel perf tools: Use just forward declarations for struct thread where possible perf tools: Add the right header to obtain PERF_ALIGN() perf tools: Remove poll.h and wait.h from util.h perf tools: Remove string.h, unistd.h and sys/stat.h from util.h perf tools: Remove stale prototypes from builtin.h perf tools: Remove string.h from util.h perf tools: Remove sys/ioctl.h from util.h perf tools: Remove a few more needless includes from util.h perf tools: Include sys/param.h where needed perf callchain: Move callchain specific routines from util.[ch] perf tools: Add compress.h for the *_decompress_to_file() headers perf mem: Fix display of data source snoop indication perf debug: Move dump_stack() and sighandler_dump_stack() to debug.h perf kvm: Make function only used by 'perf kvm' static perf tools: Move timestamp routines from util.h to time-utils.h perf tools: Move units conversion/formatting routines to separate object ...
This commit is contained in:
commit
7c8c03bfc7
|
@ -8,8 +8,9 @@ Overview
|
|||
--------
|
||||
These events are similar to tracepoint based events. Instead of Tracepoint,
|
||||
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
|
||||
kprobes can probe (this means, all functions body except for __kprobes
|
||||
functions). Unlike the Tracepoint based event, this can be added and removed
|
||||
kprobes can probe (this means, all functions except those with
|
||||
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
|
||||
Unlike the Tracepoint based event, this can be added and removed
|
||||
dynamically, on the fly.
|
||||
|
||||
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "perf/amd_iommu: " fmt
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
@ -21,44 +23,42 @@
|
|||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
|
||||
#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
|
||||
/* iommu pmu conf masks */
|
||||
#define GET_CSOURCE(x) ((x)->conf & 0xFFULL)
|
||||
#define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL)
|
||||
#define GET_DOMID(x) (((x)->conf >> 24) & 0xFFFFULL)
|
||||
#define GET_PASID(x) (((x)->conf >> 40) & 0xFFFFFULL)
|
||||
|
||||
/* iommu pmu config masks */
|
||||
#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
|
||||
#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
|
||||
#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
|
||||
#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
|
||||
#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
|
||||
#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
|
||||
#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
|
||||
/* iommu pmu conf1 masks */
|
||||
#define GET_DEVID_MASK(x) ((x)->conf1 & 0xFFFFULL)
|
||||
#define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL)
|
||||
#define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL)
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu;
|
||||
#define IOMMU_NAME_SIZE 16
|
||||
|
||||
struct perf_amd_iommu {
|
||||
struct list_head list;
|
||||
struct pmu pmu;
|
||||
struct amd_iommu *iommu;
|
||||
char name[IOMMU_NAME_SIZE];
|
||||
u8 max_banks;
|
||||
u8 max_counters;
|
||||
u64 cntr_assign_mask;
|
||||
raw_spinlock_t lock;
|
||||
const struct attribute_group *attr_groups[4];
|
||||
};
|
||||
|
||||
#define format_group attr_groups[0]
|
||||
#define cpumask_group attr_groups[1]
|
||||
#define events_group attr_groups[2]
|
||||
#define null_group attr_groups[3]
|
||||
static LIST_HEAD(perf_amd_iommu_list);
|
||||
|
||||
/*---------------------------------------------
|
||||
* sysfs format attributes
|
||||
*---------------------------------------------*/
|
||||
PMU_FORMAT_ATTR(csource, "config:0-7");
|
||||
PMU_FORMAT_ATTR(devid, "config:8-23");
|
||||
PMU_FORMAT_ATTR(pasid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(domid, "config:40-55");
|
||||
PMU_FORMAT_ATTR(domid, "config:24-39");
|
||||
PMU_FORMAT_ATTR(pasid, "config:40-59");
|
||||
PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
|
||||
PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
|
||||
PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
|
||||
|
||||
static struct attribute *iommu_format_attrs[] = {
|
||||
&format_attr_csource.attr,
|
||||
|
@ -79,6 +79,10 @@ static struct attribute_group amd_iommu_format_group = {
|
|||
/*---------------------------------------------
|
||||
* sysfs events attributes
|
||||
*---------------------------------------------*/
|
||||
static struct attribute_group amd_iommu_events_group = {
|
||||
.name = "events",
|
||||
};
|
||||
|
||||
struct amd_iommu_event_desc {
|
||||
struct kobj_attribute attr;
|
||||
const char *event;
|
||||
|
@ -150,30 +154,34 @@ static struct attribute_group amd_iommu_cpumask_group = {
|
|||
|
||||
/*---------------------------------------------*/
|
||||
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
|
||||
static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
|
||||
{
|
||||
struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
int max_cntrs = piommu->max_counters;
|
||||
int max_banks = piommu->max_banks;
|
||||
u32 shift, bank, cntr;
|
||||
unsigned long flags;
|
||||
int shift, bank, cntr, retval;
|
||||
int max_banks = perf_iommu->max_banks;
|
||||
int max_cntrs = perf_iommu->max_counters;
|
||||
int retval;
|
||||
|
||||
raw_spin_lock_irqsave(&perf_iommu->lock, flags);
|
||||
raw_spin_lock_irqsave(&piommu->lock, flags);
|
||||
|
||||
for (bank = 0, shift = 0; bank < max_banks; bank++) {
|
||||
for (cntr = 0; cntr < max_cntrs; cntr++) {
|
||||
shift = bank + (bank*3) + cntr;
|
||||
if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
|
||||
if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
|
||||
continue;
|
||||
} else {
|
||||
perf_iommu->cntr_assign_mask |= (1ULL<<shift);
|
||||
retval = ((u16)((u16)bank<<8) | (u8)(cntr));
|
||||
piommu->cntr_assign_mask |= BIT_ULL(shift);
|
||||
event->hw.iommu_bank = bank;
|
||||
event->hw.iommu_cntr = cntr;
|
||||
retval = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
retval = -ENOSPC;
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
|
||||
raw_spin_unlock_irqrestore(&piommu->lock, flags);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -202,8 +210,6 @@ static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
|
|||
static int perf_iommu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
u64 config, config1;
|
||||
|
||||
/* test the event attr type check for PMU enumeration */
|
||||
if (event->attr.type != event->pmu->type)
|
||||
|
@ -225,80 +231,62 @@ static int perf_iommu_event_init(struct perf_event *event)
|
|||
if (event->cpu < 0)
|
||||
return -EINVAL;
|
||||
|
||||
perf_iommu = &__perf_iommu;
|
||||
|
||||
if (event->pmu != &perf_iommu->pmu)
|
||||
return -ENOENT;
|
||||
|
||||
if (perf_iommu) {
|
||||
config = event->attr.config;
|
||||
config1 = event->attr.config1;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* integrate with iommu base devid (0000), assume one iommu */
|
||||
perf_iommu->max_banks =
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
|
||||
perf_iommu->max_counters =
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
|
||||
if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
|
||||
return -EINVAL;
|
||||
|
||||
/* update the hw_perf_event struct with the iommu config data */
|
||||
hwc->config = config;
|
||||
hwc->extra_reg.config = config1;
|
||||
hwc->conf = event->attr.config;
|
||||
hwc->conf1 = event->attr.config1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
|
||||
{
|
||||
return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
|
||||
}
|
||||
|
||||
static void perf_iommu_enable_event(struct perf_event *ev)
|
||||
{
|
||||
u8 csource = _GET_CSOURCE(ev);
|
||||
u16 devid = _GET_DEVID(ev);
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(ev);
|
||||
struct hw_perf_event *hwc = &ev->hw;
|
||||
u8 bank = hwc->iommu_bank;
|
||||
u8 cntr = hwc->iommu_cntr;
|
||||
u64 reg = 0ULL;
|
||||
|
||||
reg = csource;
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
reg = GET_CSOURCE(hwc);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®);
|
||||
|
||||
reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
|
||||
reg = GET_DEVID_MASK(hwc);
|
||||
reg = GET_DEVID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DEVID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®);
|
||||
|
||||
reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
|
||||
reg = GET_PASID_MASK(hwc);
|
||||
reg = GET_PASID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_PASID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®);
|
||||
|
||||
reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
|
||||
reg = GET_DOMID_MASK(hwc);
|
||||
reg = GET_DOMID(hwc) | (reg << 32);
|
||||
if (reg)
|
||||
reg |= (1UL << 31);
|
||||
amd_iommu_pc_get_set_reg_val(devid,
|
||||
_GET_BANK(ev), _GET_CNTR(ev) ,
|
||||
IOMMU_PC_DOMID_MATCH_REG, ®, true);
|
||||
reg |= BIT(31);
|
||||
amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®);
|
||||
}
|
||||
|
||||
static void perf_iommu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 reg = 0ULL;
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®, true);
|
||||
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_SRC_REG, ®);
|
||||
}
|
||||
|
||||
static void perf_iommu_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_start\n");
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
||||
|
@ -306,10 +294,11 @@ static void perf_iommu_start(struct perf_event *event, int flags)
|
|||
hwc->state = 0;
|
||||
|
||||
if (flags & PERF_EF_RELOAD) {
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
|
||||
u64 prev_raw_count = local64_read(&hwc->prev_count);
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
|
||||
amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_REG, &prev_raw_count);
|
||||
}
|
||||
|
||||
perf_iommu_enable_event(event);
|
||||
|
@ -319,37 +308,30 @@ static void perf_iommu_start(struct perf_event *event, int flags)
|
|||
|
||||
static void perf_iommu_read(struct perf_event *event)
|
||||
{
|
||||
u64 count = 0ULL;
|
||||
u64 prev_raw_count = 0ULL;
|
||||
u64 delta = 0ULL;
|
||||
u64 count, prev, delta;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
pr_debug("perf: amd_iommu:perf_iommu_read\n");
|
||||
struct amd_iommu *iommu = perf_event_2_iommu(event);
|
||||
|
||||
amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
|
||||
_GET_BANK(event), _GET_CNTR(event),
|
||||
IOMMU_PC_COUNTER_REG, &count, false);
|
||||
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= 0xFFFFFFFFFFFFULL;
|
||||
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
count) != prev_raw_count)
|
||||
if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
|
||||
IOMMU_PC_COUNTER_REG, &count))
|
||||
return;
|
||||
|
||||
/* Handling 48-bit counter overflowing */
|
||||
delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
|
||||
/* IOMMU pc counter register is only 48 bits */
|
||||
count &= GENMASK_ULL(47, 0);
|
||||
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev)
|
||||
return;
|
||||
|
||||
/* Handle 48-bit counter overflow */
|
||||
delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
|
||||
delta >>= COUNTER_SHIFT;
|
||||
local64_add(delta, &event->count);
|
||||
|
||||
}
|
||||
|
||||
static void perf_iommu_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 config;
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_stop\n");
|
||||
|
||||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
@ -361,7 +343,6 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
|
|||
if (hwc->state & PERF_HES_UPTODATE)
|
||||
return;
|
||||
|
||||
config = hwc->config;
|
||||
perf_iommu_read(event);
|
||||
hwc->state |= PERF_HES_UPTODATE;
|
||||
}
|
||||
|
@ -369,17 +350,12 @@ static void perf_iommu_stop(struct perf_event *event, int flags)
|
|||
static int perf_iommu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
int retval;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_add\n");
|
||||
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
|
||||
|
||||
/* request an iommu bank/counter */
|
||||
retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
|
||||
if (retval != -ENOSPC)
|
||||
event->hw.extra_reg.reg = (u16)retval;
|
||||
else
|
||||
retval = get_next_avail_iommu_bnk_cntr(event);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
|
@ -390,115 +366,124 @@ static int perf_iommu_add(struct perf_event *event, int flags)
|
|||
|
||||
static void perf_iommu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_amd_iommu *perf_iommu =
|
||||
container_of(event->pmu, struct perf_amd_iommu, pmu);
|
||||
|
||||
pr_debug("perf: amd_iommu:perf_iommu_del\n");
|
||||
perf_iommu_stop(event, PERF_EF_UPDATE);
|
||||
|
||||
/* clear the assigned iommu bank/counter */
|
||||
clear_avail_iommu_bnk_cntr(perf_iommu,
|
||||
_GET_BANK(event),
|
||||
_GET_CNTR(event));
|
||||
hwc->iommu_bank, hwc->iommu_cntr);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
|
||||
static __init int _init_events_attrs(void)
|
||||
{
|
||||
struct attribute **attrs;
|
||||
struct attribute_group *attr_group;
|
||||
int i = 0, j;
|
||||
struct attribute **attrs;
|
||||
|
||||
while (amd_iommu_v2_event_descs[i].attr.attr.name)
|
||||
i++;
|
||||
|
||||
attr_group = kzalloc(sizeof(struct attribute *)
|
||||
* (i + 1) + sizeof(*attr_group), GFP_KERNEL);
|
||||
if (!attr_group)
|
||||
attrs = kzalloc(sizeof(struct attribute **) * (i + 1), GFP_KERNEL);
|
||||
if (!attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
attrs = (struct attribute **)(attr_group + 1);
|
||||
for (j = 0; j < i; j++)
|
||||
attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
|
||||
|
||||
attr_group->name = "events";
|
||||
attr_group->attrs = attrs;
|
||||
perf_iommu->events_group = attr_group;
|
||||
|
||||
amd_iommu_events_group.attrs = attrs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void amd_iommu_pc_exit(void)
|
||||
{
|
||||
if (__perf_iommu.events_group != NULL) {
|
||||
kfree(__perf_iommu.events_group);
|
||||
__perf_iommu.events_group = NULL;
|
||||
}
|
||||
}
|
||||
const struct attribute_group *amd_iommu_attr_groups[] = {
|
||||
&amd_iommu_format_group,
|
||||
&amd_iommu_cpumask_group,
|
||||
&amd_iommu_events_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static __init int _init_perf_amd_iommu(
|
||||
struct perf_amd_iommu *perf_iommu, char *name)
|
||||
static struct pmu iommu_pmu = {
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_iommu_attr_groups,
|
||||
};
|
||||
|
||||
static __init int init_one_iommu(unsigned int idx)
|
||||
{
|
||||
struct perf_amd_iommu *perf_iommu;
|
||||
int ret;
|
||||
|
||||
perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
|
||||
if (!perf_iommu)
|
||||
return -ENOMEM;
|
||||
|
||||
raw_spin_lock_init(&perf_iommu->lock);
|
||||
|
||||
/* Init format attributes */
|
||||
perf_iommu->format_group = &amd_iommu_format_group;
|
||||
perf_iommu->pmu = iommu_pmu;
|
||||
perf_iommu->iommu = get_amd_iommu(idx);
|
||||
perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx);
|
||||
perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
|
||||
|
||||
/* Init events attributes */
|
||||
if (_init_events_attrs(perf_iommu) != 0)
|
||||
pr_err("perf: amd_iommu: Only support raw events.\n");
|
||||
|
||||
/* Init null attributes */
|
||||
perf_iommu->null_group = NULL;
|
||||
perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
|
||||
if (ret) {
|
||||
pr_err("perf: amd_iommu: Failed to initialized.\n");
|
||||
amd_iommu_pc_exit();
|
||||
} else {
|
||||
pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
|
||||
amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
|
||||
amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
|
||||
if (!perf_iommu->iommu ||
|
||||
!perf_iommu->max_banks ||
|
||||
!perf_iommu->max_counters) {
|
||||
kfree(perf_iommu);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
|
||||
|
||||
ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
|
||||
if (!ret) {
|
||||
pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
|
||||
idx, perf_iommu->max_banks, perf_iommu->max_counters);
|
||||
list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
|
||||
} else {
|
||||
pr_warn("Error initializing IOMMU %d.\n", idx);
|
||||
kfree(perf_iommu);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct perf_amd_iommu __perf_iommu = {
|
||||
.pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.event_init = perf_iommu_event_init,
|
||||
.add = perf_iommu_add,
|
||||
.del = perf_iommu_del,
|
||||
.start = perf_iommu_start,
|
||||
.stop = perf_iommu_stop,
|
||||
.read = perf_iommu_read,
|
||||
},
|
||||
.max_banks = 0x00,
|
||||
.max_counters = 0x00,
|
||||
.cntr_assign_mask = 0ULL,
|
||||
.format_group = NULL,
|
||||
.cpumask_group = NULL,
|
||||
.events_group = NULL,
|
||||
.null_group = NULL,
|
||||
};
|
||||
|
||||
static __init int amd_iommu_pc_init(void)
|
||||
{
|
||||
unsigned int i, cnt = 0;
|
||||
int ret;
|
||||
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_supported())
|
||||
return -ENODEV;
|
||||
|
||||
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
|
||||
ret = _init_events_attrs();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* An IOMMU PMU is specific to an IOMMU, and can function independently.
|
||||
* So we go through all IOMMUs and ignore the one that fails init
|
||||
* unless all IOMMU are failing.
|
||||
*/
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
|
||||
ret = init_one_iommu(i);
|
||||
if (!ret)
|
||||
cnt++;
|
||||
}
|
||||
|
||||
if (!cnt) {
|
||||
kfree(amd_iommu_events_group.attrs);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Init cpumask attributes to only core 0 */
|
||||
cpumask_set_cpu(0, &iommu_cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,17 +24,23 @@
|
|||
#define PC_MAX_SPEC_BNKS 64
|
||||
#define PC_MAX_SPEC_CNTRS 16
|
||||
|
||||
/* iommu pc reg masks*/
|
||||
#define IOMMU_BASE_DEVID 0x0000
|
||||
struct amd_iommu;
|
||||
|
||||
/* amd_iommu_init.c external support functions */
|
||||
extern int amd_iommu_get_num_iommus(void);
|
||||
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
|
||||
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
|
||||
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value);
|
||||
|
||||
extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value);
|
||||
|
||||
extern struct amd_iommu *get_amd_iommu(int idx);
|
||||
|
||||
#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
|
||||
|
|
|
@ -30,6 +30,9 @@
|
|||
|
||||
#define COUNTER_SHIFT 16
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "amd_uncore: " fmt
|
||||
|
||||
static int num_counters_llc;
|
||||
static int num_counters_nb;
|
||||
|
||||
|
@ -509,51 +512,34 @@ static int __init amd_uncore_init(void)
|
|||
int ret = -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
goto fail_nodev;
|
||||
|
||||
switch(boot_cpu_data.x86) {
|
||||
case 23:
|
||||
/* Family 17h: */
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L3;
|
||||
/*
|
||||
* For Family17h, the NorthBridge counters are
|
||||
* re-purposed as Data Fabric counters. Also, support is
|
||||
* added for L3 counters. The pmus are exported based on
|
||||
* family as either L2 or L3 and NB or DF.
|
||||
*/
|
||||
amd_nb_pmu.name = "amd_df";
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
break;
|
||||
case 22:
|
||||
/* Family 16h - may change: */
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* All prior families have the same number of
|
||||
* NorthBridge and Last Level Cache counters
|
||||
*/
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
break;
|
||||
}
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
||||
return -ENODEV;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
|
||||
goto fail_nodev;
|
||||
return -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86 == 0x17) {
|
||||
/*
|
||||
* For F17h, the Northbridge counters are repurposed as Data
|
||||
* Fabric counters. Also, L3 counters are supported too. The PMUs
|
||||
* are exported based on family as either L2 or L3 and NB or DF.
|
||||
*/
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L3;
|
||||
amd_nb_pmu.name = "amd_df";
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
} else {
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
}
|
||||
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
|
||||
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
||||
|
@ -565,7 +551,7 @@ static int __init amd_uncore_init(void)
|
|||
if (ret)
|
||||
goto fail_nb;
|
||||
|
||||
pr_info("perf: AMD NB counters detected\n");
|
||||
pr_info("AMD NB counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
@ -579,7 +565,7 @@ static int __init amd_uncore_init(void)
|
|||
if (ret)
|
||||
goto fail_llc;
|
||||
|
||||
pr_info("perf: AMD LLC counters detected\n");
|
||||
pr_info("AMD LLC counters detected\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
@ -615,7 +601,6 @@ static int __init amd_uncore_init(void)
|
|||
if (amd_uncore_nb)
|
||||
free_percpu(amd_uncore_nb);
|
||||
|
||||
fail_nodev:
|
||||
return ret;
|
||||
}
|
||||
device_initcall(amd_uncore_init);
|
||||
|
|
|
@ -63,7 +63,6 @@ struct bts_buffer {
|
|||
unsigned int cur_buf;
|
||||
bool snapshot;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local_t head;
|
||||
unsigned long end;
|
||||
void **data_pages;
|
||||
|
@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts)
|
|||
return;
|
||||
|
||||
if (ds->bts_index >= ds->bts_absolute_maximum)
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(&bts->handle,
|
||||
PERF_AUX_FLAG_TRUNCATED);
|
||||
|
||||
/*
|
||||
* old and head are always in the same physical buffer, so we
|
||||
|
@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags)
|
|||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
perf_aux_output_end(&bts->handle, 0);
|
||||
|
||||
fail_stop:
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
|
@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags)
|
|||
bts->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&bts->handle,
|
||||
local_xchg(&buf->data_size, 0));
|
||||
}
|
||||
|
||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||
|
@ -484,8 +483,7 @@ int intel_bts_interrupt(void)
|
|||
if (old_head == local_read(&buf->head))
|
||||
return handled;
|
||||
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||
!!local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
|
||||
|
||||
buf = perf_aux_output_begin(&bts->handle, event);
|
||||
if (buf)
|
||||
|
@ -500,7 +498,7 @@ int intel_bts_interrupt(void)
|
|||
* cleared handle::event
|
||||
*/
|
||||
barrier();
|
||||
perf_aux_output_end(&bts->handle, 0, false);
|
||||
perf_aux_output_end(&bts->handle, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1553,6 +1553,27 @@ static __initconst const u64 slm_hw_cache_event_ids
|
|||
},
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
|
||||
EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
|
||||
/* UOPS_NOT_DELIVERED.ANY */
|
||||
EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
|
||||
/* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
|
||||
EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
|
||||
/* UOPS_RETIRED.ANY */
|
||||
EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
|
||||
/* UOPS_ISSUED.ANY */
|
||||
EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
|
||||
|
||||
static struct attribute *glm_events_attrs[] = {
|
||||
EVENT_PTR(td_total_slots_glm),
|
||||
EVENT_PTR(td_total_slots_scale_glm),
|
||||
EVENT_PTR(td_fetch_bubbles_glm),
|
||||
EVENT_PTR(td_recovery_bubbles_glm),
|
||||
EVENT_PTR(td_slots_issued_glm),
|
||||
EVENT_PTR(td_slots_retired_glm),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
|
||||
|
@ -2130,7 +2151,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
|
|||
* counters from the GLOBAL_STATUS mask and we always process PEBS
|
||||
* events via drain_pebs().
|
||||
*/
|
||||
status &= ~cpuc->pebs_enabled;
|
||||
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
|
||||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
|
@ -3750,6 +3771,7 @@ __init int intel_pmu_init(void)
|
|||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.cpu_events = glm_events_attrs;
|
||||
pr_cont("Goldmont events, ");
|
||||
break;
|
||||
|
||||
|
|
|
@ -1222,7 +1222,7 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
|||
|
||||
/* clear non-PEBS bit and re-check */
|
||||
pebs_status = p->status & cpuc->pebs_enabled;
|
||||
pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
|
||||
pebs_status &= PEBS_COUNTER_MASK;
|
||||
if (pebs_status == (1 << bit))
|
||||
return at;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <asm/insn.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/intel_pt.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
#include "pt.h"
|
||||
|
@ -98,6 +99,7 @@ static struct attribute_group pt_cap_group = {
|
|||
.name = "caps",
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR(pt, "config:0" );
|
||||
PMU_FORMAT_ATTR(cyc, "config:1" );
|
||||
PMU_FORMAT_ATTR(pwr_evt, "config:4" );
|
||||
PMU_FORMAT_ATTR(fup_on_ptw, "config:5" );
|
||||
|
@ -105,11 +107,13 @@ PMU_FORMAT_ATTR(mtc, "config:9" );
|
|||
PMU_FORMAT_ATTR(tsc, "config:10" );
|
||||
PMU_FORMAT_ATTR(noretcomp, "config:11" );
|
||||
PMU_FORMAT_ATTR(ptw, "config:12" );
|
||||
PMU_FORMAT_ATTR(branch, "config:13" );
|
||||
PMU_FORMAT_ATTR(mtc_period, "config:14-17" );
|
||||
PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" );
|
||||
PMU_FORMAT_ATTR(psb_period, "config:24-27" );
|
||||
|
||||
static struct attribute *pt_formats_attr[] = {
|
||||
&format_attr_pt.attr,
|
||||
&format_attr_cyc.attr,
|
||||
&format_attr_pwr_evt.attr,
|
||||
&format_attr_fup_on_ptw.attr,
|
||||
|
@ -117,6 +121,7 @@ static struct attribute *pt_formats_attr[] = {
|
|||
&format_attr_tsc.attr,
|
||||
&format_attr_noretcomp.attr,
|
||||
&format_attr_ptw.attr,
|
||||
&format_attr_branch.attr,
|
||||
&format_attr_mtc_period.attr,
|
||||
&format_attr_cyc_thresh.attr,
|
||||
&format_attr_psb_period.attr,
|
||||
|
@ -197,6 +202,19 @@ static int __init pt_pmu_hw_init(void)
|
|||
pt_pmu.tsc_art_den = eax;
|
||||
}
|
||||
|
||||
/* model-specific quirks */
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case INTEL_FAM6_BROADWELL_CORE:
|
||||
case INTEL_FAM6_BROADWELL_XEON_D:
|
||||
case INTEL_FAM6_BROADWELL_GT3E:
|
||||
case INTEL_FAM6_BROADWELL_X:
|
||||
/* not setting BRANCH_EN will #GP, erratum BDM106 */
|
||||
pt_pmu.branch_en_always_on = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
||||
/*
|
||||
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
||||
|
@ -263,8 +281,20 @@ static int __init pt_pmu_hw_init(void)
|
|||
#define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \
|
||||
RTIT_CTL_FUP_ON_PTW)
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \
|
||||
/*
|
||||
* Bit 0 (TraceEn) in the attr.config is meaningless as the
|
||||
* corresponding bit in the RTIT_CTL can only be controlled
|
||||
* by the driver; therefore, repurpose it to mean: pass
|
||||
* through the bit that was previously assumed to be always
|
||||
* on for PT, thereby allowing the user to *not* set it if
|
||||
* they so wish. See also pt_event_valid() and pt_config().
|
||||
*/
|
||||
#define RTIT_CTL_PASSTHROUGH RTIT_CTL_TRACEEN
|
||||
|
||||
#define PT_CONFIG_MASK (RTIT_CTL_TRACEEN | \
|
||||
RTIT_CTL_TSC_EN | \
|
||||
RTIT_CTL_DISRETC | \
|
||||
RTIT_CTL_BRANCH_EN | \
|
||||
RTIT_CTL_CYC_PSB | \
|
||||
RTIT_CTL_MTC | \
|
||||
RTIT_CTL_PWR_EVT_EN | \
|
||||
|
@ -332,6 +362,33 @@ static bool pt_event_valid(struct perf_event *event)
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
|
||||
* clears the assomption that BranchEn must always be enabled,
|
||||
* as was the case with the first implementation of PT.
|
||||
* If this bit is not set, the legacy behavior is preserved
|
||||
* for compatibility with the older userspace.
|
||||
*
|
||||
* Re-using bit 0 for this purpose is fine because it is never
|
||||
* directly set by the user; previous attempts at setting it in
|
||||
* the attr.config resulted in -EINVAL.
|
||||
*/
|
||||
if (config & RTIT_CTL_PASSTHROUGH) {
|
||||
/*
|
||||
* Disallow not setting BRANCH_EN where BRANCH_EN is
|
||||
* always required.
|
||||
*/
|
||||
if (pt_pmu.branch_en_always_on &&
|
||||
!(config & RTIT_CTL_BRANCH_EN))
|
||||
return false;
|
||||
} else {
|
||||
/*
|
||||
* Disallow BRANCH_EN without the PASSTHROUGH.
|
||||
*/
|
||||
if (config & RTIT_CTL_BRANCH_EN)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -411,6 +468,7 @@ static u64 pt_config_filters(struct perf_event *event)
|
|||
|
||||
static void pt_config(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 reg;
|
||||
|
||||
if (!event->hw.itrace_started) {
|
||||
|
@ -419,7 +477,20 @@ static void pt_config(struct perf_event *event)
|
|||
}
|
||||
|
||||
reg = pt_config_filters(event);
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
|
||||
|
||||
/*
|
||||
* Previously, we had BRANCH_EN on by default, but now that PT has
|
||||
* grown features outside of branch tracing, it is useful to allow
|
||||
* the user to disable it. Setting bit 0 in the event's attr.config
|
||||
* allows BRANCH_EN to pass through instead of being always on. See
|
||||
* also the comment in pt_event_valid().
|
||||
*/
|
||||
if (event->attr.config & BIT(0)) {
|
||||
reg |= event->attr.config & RTIT_CTL_BRANCH_EN;
|
||||
} else {
|
||||
reg |= RTIT_CTL_BRANCH_EN;
|
||||
}
|
||||
|
||||
if (!event->attr.exclude_kernel)
|
||||
reg |= RTIT_CTL_OS;
|
||||
|
@ -429,11 +500,15 @@ static void pt_config(struct perf_event *event)
|
|||
reg |= (event->attr.config & PT_CONFIG_MASK);
|
||||
|
||||
event->hw.config = reg;
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
|
||||
else
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, reg);
|
||||
}
|
||||
|
||||
static void pt_config_stop(struct perf_event *event)
|
||||
{
|
||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
u64 ctl = READ_ONCE(event->hw.config);
|
||||
|
||||
/* may be already stopped by a PMI */
|
||||
|
@ -441,7 +516,8 @@ static void pt_config_stop(struct perf_event *event)
|
|||
return;
|
||||
|
||||
ctl &= ~RTIT_CTL_TRACEEN;
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
if (!READ_ONCE(pt->vmx_on))
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
|
||||
|
||||
WRITE_ONCE(event->hw.config, ctl);
|
||||
|
||||
|
@ -753,7 +829,8 @@ static void pt_handle_status(struct pt *pt)
|
|||
*/
|
||||
if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
|
||||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(&pt->handle,
|
||||
PERF_AUX_FLAG_TRUNCATED);
|
||||
advance++;
|
||||
}
|
||||
}
|
||||
|
@ -846,8 +923,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
|
|||
|
||||
/* can't stop in the middle of an output region */
|
||||
if (buf->output_off + handle->size + 1 <
|
||||
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
|
||||
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
/* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
|
||||
|
@ -1171,12 +1250,6 @@ void intel_pt_interrupt(void)
|
|||
if (!READ_ONCE(pt->handle_nmi))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If VMX is on and PT does not support it, don't touch anything.
|
||||
*/
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
return;
|
||||
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
|
@ -1192,8 +1265,7 @@ void intel_pt_interrupt(void)
|
|||
|
||||
pt_update_head(pt);
|
||||
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
|
||||
local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
|
||||
|
||||
if (!event->hw.state) {
|
||||
int ret;
|
||||
|
@ -1208,7 +1280,7 @@ void intel_pt_interrupt(void)
|
|||
/* snapshot counters don't use PMI, so it's safe */
|
||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
||||
if (ret) {
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
perf_aux_output_end(&pt->handle, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1237,12 +1309,19 @@ void intel_pt_handle_vmx(int on)
|
|||
local_irq_save(flags);
|
||||
WRITE_ONCE(pt->vmx_on, on);
|
||||
|
||||
if (on) {
|
||||
/* prevent pt_config_stop() from writing RTIT_CTL */
|
||||
event = pt->handle.event;
|
||||
if (event)
|
||||
event->hw.config = 0;
|
||||
}
|
||||
/*
|
||||
* If an AUX transaction is in progress, it will contain
|
||||
* gap(s), so flag it PARTIAL to inform the user.
|
||||
*/
|
||||
event = pt->handle.event;
|
||||
if (event)
|
||||
perf_aux_output_flag(&pt->handle,
|
||||
PERF_AUX_FLAG_PARTIAL);
|
||||
|
||||
/* Turn PTs back on */
|
||||
if (!on && event)
|
||||
wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
|
||||
|
@ -1257,9 +1336,6 @@ static void pt_event_start(struct perf_event *event, int mode)
|
|||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||
struct pt_buffer *buf;
|
||||
|
||||
if (READ_ONCE(pt->vmx_on))
|
||||
return;
|
||||
|
||||
buf = perf_aux_output_begin(&pt->handle, event);
|
||||
if (!buf)
|
||||
goto fail_stop;
|
||||
|
@ -1280,7 +1356,7 @@ static void pt_event_start(struct perf_event *event, int mode)
|
|||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(&pt->handle, 0, true);
|
||||
perf_aux_output_end(&pt->handle, 0);
|
||||
fail_stop:
|
||||
hwc->state = PERF_HES_STOPPED;
|
||||
}
|
||||
|
@ -1321,8 +1397,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
|||
pt->handle.head =
|
||||
local_xchg(&buf->data_size,
|
||||
buf->nr_pages << PAGE_SHIFT);
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
|
||||
local_xchg(&buf->lost, 0));
|
||||
perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -110,6 +110,7 @@ struct pt_pmu {
|
|||
struct pmu pmu;
|
||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||
bool vmx;
|
||||
bool branch_en_always_on;
|
||||
unsigned long max_nonturbo_ratio;
|
||||
unsigned int tsc_art_num;
|
||||
unsigned int tsc_art_den;
|
||||
|
@ -143,7 +144,6 @@ struct pt_buffer {
|
|||
size_t output_off;
|
||||
unsigned long nr_pages;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
local64_t head;
|
||||
bool snapshot;
|
||||
unsigned long stop_pos, intr_pos;
|
||||
|
|
|
@ -79,6 +79,7 @@ struct amd_nb {
|
|||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
|
||||
|
||||
/*
|
||||
* Flags PEBS can handle without an PMI.
|
||||
|
|
|
@ -72,14 +72,13 @@ struct arch_specific_insn {
|
|||
/* copy of the original instruction */
|
||||
kprobe_opcode_t *insn;
|
||||
/*
|
||||
* boostable = -1: This instruction type is not boostable.
|
||||
* boostable = 0: This instruction type is boostable.
|
||||
* boostable = 1: This instruction has been boosted: we have
|
||||
* boostable = false: This instruction type is not boostable.
|
||||
* boostable = true: This instruction has been boosted: we have
|
||||
* added a relative jump after the instruction copy in insn,
|
||||
* so no single-step and fixup are needed (unless there's
|
||||
* a post_handler or break_handler).
|
||||
*/
|
||||
int boostable;
|
||||
bool boostable;
|
||||
bool if_modifier;
|
||||
};
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@
|
|||
#endif
|
||||
|
||||
/* Ensure if the instruction can be boostable */
|
||||
extern int can_boost(kprobe_opcode_t *instruction, void *addr);
|
||||
extern int can_boost(struct insn *insn, void *orig_addr);
|
||||
/* Recover instruction if given address is probed */
|
||||
extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
|
||||
unsigned long addr);
|
||||
|
@ -75,7 +75,7 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
|
|||
* Copy an instruction and adjust the displacement if the instruction
|
||||
* uses the %rip-relative addressing mode.
|
||||
*/
|
||||
extern int __copy_instruction(u8 *dest, u8 *src);
|
||||
extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
|
||||
|
||||
/* Generate a relative-jump/call instruction */
|
||||
extern void synthesize_reljump(void *from, void *to);
|
||||
|
|
|
@ -164,42 +164,38 @@ static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
|
|||
NOKPROBE_SYMBOL(skip_prefixes);
|
||||
|
||||
/*
|
||||
* Returns non-zero if opcode is boostable.
|
||||
* Returns non-zero if INSN is boostable.
|
||||
* RIP relative instructions are adjusted at copying time in 64 bits mode
|
||||
*/
|
||||
int can_boost(kprobe_opcode_t *opcodes, void *addr)
|
||||
int can_boost(struct insn *insn, void *addr)
|
||||
{
|
||||
kprobe_opcode_t opcode;
|
||||
kprobe_opcode_t *orig_opcodes = opcodes;
|
||||
|
||||
if (search_exception_tables((unsigned long)addr))
|
||||
return 0; /* Page fault may occur on this address. */
|
||||
|
||||
retry:
|
||||
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
|
||||
return 0;
|
||||
opcode = *(opcodes++);
|
||||
|
||||
/* 2nd-byte opcode */
|
||||
if (opcode == 0x0f) {
|
||||
if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
|
||||
return 0;
|
||||
return test_bit(*opcodes,
|
||||
if (insn->opcode.nbytes == 2)
|
||||
return test_bit(insn->opcode.bytes[1],
|
||||
(unsigned long *)twobyte_is_boostable);
|
||||
}
|
||||
|
||||
if (insn->opcode.nbytes != 1)
|
||||
return 0;
|
||||
|
||||
/* Can't boost Address-size override prefix */
|
||||
if (unlikely(inat_is_address_size_prefix(insn->attr)))
|
||||
return 0;
|
||||
|
||||
opcode = insn->opcode.bytes[0];
|
||||
|
||||
switch (opcode & 0xf0) {
|
||||
#ifdef CONFIG_X86_64
|
||||
case 0x40:
|
||||
goto retry; /* REX prefix is boostable */
|
||||
#endif
|
||||
case 0x60:
|
||||
if (0x63 < opcode && opcode < 0x67)
|
||||
goto retry; /* prefixes */
|
||||
/* can't boost Address-size override and bound */
|
||||
return (opcode != 0x62 && opcode != 0x67);
|
||||
/* can't boost "bound" */
|
||||
return (opcode != 0x62);
|
||||
case 0x70:
|
||||
return 0; /* can't boost conditional jump */
|
||||
case 0x90:
|
||||
return opcode != 0x9a; /* can't boost call far */
|
||||
case 0xc0:
|
||||
/* can't boost software-interruptions */
|
||||
return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
|
||||
|
@ -210,14 +206,9 @@ int can_boost(kprobe_opcode_t *opcodes, void *addr)
|
|||
/* can boost in/out and absolute jmps */
|
||||
return ((opcode & 0x04) || opcode == 0xea);
|
||||
case 0xf0:
|
||||
if ((opcode & 0x0c) == 0 && opcode != 0xf1)
|
||||
goto retry; /* lock/rep(ne) prefix */
|
||||
/* clear and set flags are boostable */
|
||||
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
|
||||
default:
|
||||
/* segment override prefixes are boostable */
|
||||
if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
|
||||
goto retry; /* prefixes */
|
||||
/* CS override prefix and call are not boostable */
|
||||
return (opcode != 0x2e && opcode != 0x9a);
|
||||
}
|
||||
|
@ -264,7 +255,10 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
|||
* Fortunately, we know that the original code is the ideal 5-byte
|
||||
* long NOP.
|
||||
*/
|
||||
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
if (faddr)
|
||||
memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
|
||||
else
|
||||
|
@ -276,7 +270,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
|||
* Recover the probed instruction at addr for further analysis.
|
||||
* Caller must lock kprobes by kprobe_mutex, or disable preemption
|
||||
* for preventing to release referencing kprobes.
|
||||
* Returns zero if the instruction can not get recovered.
|
||||
* Returns zero if the instruction can not get recovered (or access failed).
|
||||
*/
|
||||
unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
|
||||
{
|
||||
|
@ -348,37 +342,36 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
|
|||
}
|
||||
|
||||
/*
|
||||
* Copy an instruction and adjust the displacement if the instruction
|
||||
* uses the %rip-relative addressing mode.
|
||||
* If it does, Return the address of the 32-bit displacement word.
|
||||
* If not, return null.
|
||||
* Only applicable to 64-bit x86.
|
||||
* Copy an instruction with recovering modified instruction by kprobes
|
||||
* and adjust the displacement if the instruction uses the %rip-relative
|
||||
* addressing mode.
|
||||
* This returns the length of copied instruction, or 0 if it has an error.
|
||||
*/
|
||||
int __copy_instruction(u8 *dest, u8 *src)
|
||||
int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
|
||||
{
|
||||
struct insn insn;
|
||||
kprobe_opcode_t buf[MAX_INSN_SIZE];
|
||||
int length;
|
||||
unsigned long recovered_insn =
|
||||
recover_probed_instruction(buf, (unsigned long)src);
|
||||
|
||||
if (!recovered_insn)
|
||||
if (!recovered_insn || !insn)
|
||||
return 0;
|
||||
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
|
||||
insn_get_length(&insn);
|
||||
length = insn.length;
|
||||
|
||||
/* This can access kernel text if given address is not recovered */
|
||||
if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
|
||||
return 0;
|
||||
|
||||
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
|
||||
insn_get_length(insn);
|
||||
|
||||
/* Another subsystem puts a breakpoint, failed to recover */
|
||||
if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
|
||||
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
|
||||
return 0;
|
||||
memcpy(dest, insn.kaddr, length);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (insn_rip_relative(&insn)) {
|
||||
/* Only x86_64 has RIP relative instructions */
|
||||
if (insn_rip_relative(insn)) {
|
||||
s64 newdisp;
|
||||
u8 *disp;
|
||||
kernel_insn_init(&insn, dest, length);
|
||||
insn_get_displacement(&insn);
|
||||
/*
|
||||
* The copied instruction uses the %rip-relative addressing
|
||||
* mode. Adjust the displacement for the difference between
|
||||
|
@ -391,36 +384,57 @@ int __copy_instruction(u8 *dest, u8 *src)
|
|||
* extension of the original signed 32-bit displacement would
|
||||
* have given.
|
||||
*/
|
||||
newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
|
||||
newdisp = (u8 *) src + (s64) insn->displacement.value
|
||||
- (u8 *) dest;
|
||||
if ((s64) (s32) newdisp != newdisp) {
|
||||
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
|
||||
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", src, dest, insn.displacement.value);
|
||||
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
|
||||
src, dest, insn->displacement.value);
|
||||
return 0;
|
||||
}
|
||||
disp = (u8 *) dest + insn_offset_displacement(&insn);
|
||||
disp = (u8 *) dest + insn_offset_displacement(insn);
|
||||
*(s32 *) disp = (s32) newdisp;
|
||||
}
|
||||
#endif
|
||||
return length;
|
||||
return insn->length;
|
||||
}
|
||||
|
||||
/* Prepare reljump right after instruction to boost */
|
||||
static void prepare_boost(struct kprobe *p, struct insn *insn)
|
||||
{
|
||||
if (can_boost(insn, p->addr) &&
|
||||
MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
|
||||
/*
|
||||
* These instructions can be executed directly if it
|
||||
* jumps back to correct address.
|
||||
*/
|
||||
synthesize_reljump(p->ainsn.insn + insn->length,
|
||||
p->addr + insn->length);
|
||||
p->ainsn.boostable = true;
|
||||
} else {
|
||||
p->ainsn.boostable = false;
|
||||
}
|
||||
}
|
||||
|
||||
static int arch_copy_kprobe(struct kprobe *p)
|
||||
{
|
||||
int ret;
|
||||
struct insn insn;
|
||||
int len;
|
||||
|
||||
set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
|
||||
|
||||
/* Copy an instruction with recovering if other optprobe modifies it.*/
|
||||
ret = __copy_instruction(p->ainsn.insn, p->addr);
|
||||
if (!ret)
|
||||
len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* __copy_instruction can modify the displacement of the instruction,
|
||||
* but it doesn't affect boostable check.
|
||||
*/
|
||||
if (can_boost(p->ainsn.insn, p->addr))
|
||||
p->ainsn.boostable = 0;
|
||||
else
|
||||
p->ainsn.boostable = -1;
|
||||
prepare_boost(p, &insn);
|
||||
|
||||
set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
|
||||
|
||||
/* Check whether the instruction modifies Interrupt Flag or not */
|
||||
p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
|
||||
|
@ -459,7 +473,7 @@ void arch_disarm_kprobe(struct kprobe *p)
|
|||
void arch_remove_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (p->ainsn.insn) {
|
||||
free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
|
||||
free_insn_slot(p->ainsn.insn, p->ainsn.boostable);
|
||||
p->ainsn.insn = NULL;
|
||||
}
|
||||
}
|
||||
|
@ -531,7 +545,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
|
|||
return;
|
||||
|
||||
#if !defined(CONFIG_PREEMPT)
|
||||
if (p->ainsn.boostable == 1 && !p->post_handler) {
|
||||
if (p->ainsn.boostable && !p->post_handler) {
|
||||
/* Boost up -- we can execute copied instructions directly */
|
||||
if (!reenter)
|
||||
reset_current_kprobe();
|
||||
|
@ -851,7 +865,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
|||
case 0xcf:
|
||||
case 0xea: /* jmp absolute -- ip is correct */
|
||||
/* ip is already adjusted, no more changes required */
|
||||
p->ainsn.boostable = 1;
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
case 0xe8: /* call relative - Fix return addr */
|
||||
*tos = orig_ip + (*tos - copy_ip);
|
||||
|
@ -876,28 +890,13 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
|
|||
* jmp near and far, absolute indirect
|
||||
* ip is correct. And this is boostable
|
||||
*/
|
||||
p->ainsn.boostable = 1;
|
||||
p->ainsn.boostable = true;
|
||||
goto no_change;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (p->ainsn.boostable == 0) {
|
||||
if ((regs->ip > copy_ip) &&
|
||||
(regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) {
|
||||
/*
|
||||
* These instructions can be executed directly if it
|
||||
* jumps back to correct address.
|
||||
*/
|
||||
synthesize_reljump((void *)regs->ip,
|
||||
(void *)orig_ip + (regs->ip - copy_ip));
|
||||
p->ainsn.boostable = 1;
|
||||
} else {
|
||||
p->ainsn.boostable = -1;
|
||||
}
|
||||
}
|
||||
|
||||
regs->ip += orig_ip - copy_ip;
|
||||
|
||||
no_change:
|
||||
|
|
|
@ -94,6 +94,6 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
|
|||
int arch_prepare_kprobe_ftrace(struct kprobe *p)
|
||||
{
|
||||
p->ainsn.insn = NULL;
|
||||
p->ainsn.boostable = -1;
|
||||
p->ainsn.boostable = false;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -65,7 +65,10 @@ unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
|||
* overwritten by jump destination address. In this case, original
|
||||
* bytes must be recovered from op->optinsn.copied_insn buffer.
|
||||
*/
|
||||
memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
if (addr == (unsigned long)kp->addr) {
|
||||
buf[0] = kp->opcode;
|
||||
memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
|
||||
|
@ -174,11 +177,12 @@ NOKPROBE_SYMBOL(optimized_callback);
|
|||
|
||||
static int copy_optimized_instructions(u8 *dest, u8 *src)
|
||||
{
|
||||
struct insn insn;
|
||||
int len = 0, ret;
|
||||
|
||||
while (len < RELATIVEJUMP_SIZE) {
|
||||
ret = __copy_instruction(dest + len, src + len);
|
||||
if (!ret || !can_boost(dest + len, src + len))
|
||||
ret = __copy_instruction(dest + len, src + len, &insn);
|
||||
if (!ret || !can_boost(&insn, src + len))
|
||||
return -EINVAL;
|
||||
len += ret;
|
||||
}
|
||||
|
@ -350,6 +354,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
|||
}
|
||||
|
||||
buf = (u8 *)op->optinsn.insn;
|
||||
set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
|
||||
|
||||
/* Copy instructions into the out-of-line buffer */
|
||||
ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
|
||||
|
@ -372,6 +377,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
|
|||
synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
|
||||
(u8 *)op->kp.addr + op->optinsn.size);
|
||||
|
||||
set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
|
||||
|
||||
flush_icache_range((unsigned long) buf,
|
||||
(unsigned long) buf + TMPL_END_IDX +
|
||||
op->optinsn.size + RELATIVEJUMP_SIZE);
|
||||
|
|
|
@ -321,7 +321,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
|
|||
|
||||
static unsigned long etb_reset_buffer(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost)
|
||||
void *sink_config)
|
||||
{
|
||||
unsigned long size = 0;
|
||||
struct cs_buffers *buf = sink_config;
|
||||
|
@ -343,7 +343,6 @@ static unsigned long etb_reset_buffer(struct coresight_device *csdev,
|
|||
* resetting parameters here and squaring off with the ring
|
||||
* buffer API in the tracer PMU is fine.
|
||||
*/
|
||||
*lost = !!local_xchg(&buf->lost, 0);
|
||||
size = local_xchg(&buf->data_size, 0);
|
||||
}
|
||||
|
||||
|
@ -385,7 +384,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
|||
(unsigned long)write_ptr);
|
||||
|
||||
write_ptr &= ~(ETB_FRAME_SIZE_WORDS - 1);
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -396,7 +395,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
|||
*/
|
||||
status = readl_relaxed(drvdata->base + ETB_STATUS_REG);
|
||||
if (status & ETB_STATUS_RAM_FULL) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
to_read = capacity;
|
||||
read_ptr = write_ptr;
|
||||
} else {
|
||||
|
@ -429,7 +428,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
|
|||
if (read_ptr > (drvdata->buffer_depth - 1))
|
||||
read_ptr -= drvdata->buffer_depth;
|
||||
/* let the decoder know we've skipped ahead */
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
/* finally tell HW where we want to start reading from */
|
||||
|
|
|
@ -302,7 +302,8 @@ static void etm_event_start(struct perf_event *event, int flags)
|
|||
return;
|
||||
|
||||
fail_end_stop:
|
||||
perf_aux_output_end(handle, 0, true);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
perf_aux_output_end(handle, 0);
|
||||
fail:
|
||||
event->hw.state = PERF_HES_STOPPED;
|
||||
goto out;
|
||||
|
@ -310,7 +311,6 @@ static void etm_event_start(struct perf_event *event, int flags)
|
|||
|
||||
static void etm_event_stop(struct perf_event *event, int mode)
|
||||
{
|
||||
bool lost;
|
||||
int cpu = smp_processor_id();
|
||||
unsigned long size;
|
||||
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
|
||||
|
@ -348,10 +348,9 @@ static void etm_event_stop(struct perf_event *event, int mode)
|
|||
return;
|
||||
|
||||
size = sink_ops(sink)->reset_buffer(sink, handle,
|
||||
event_data->snk_config,
|
||||
&lost);
|
||||
event_data->snk_config);
|
||||
|
||||
perf_aux_output_end(handle, size, lost);
|
||||
perf_aux_output_end(handle, size);
|
||||
}
|
||||
|
||||
/* Disabling the path make its elements available to other sessions */
|
||||
|
|
|
@ -76,7 +76,6 @@ enum cs_mode {
|
|||
* @nr_pages: max number of pages granted to us
|
||||
* @offset: offset within the current buffer
|
||||
* @data_size: how much we collected in this run
|
||||
* @lost: other than zero if we had a HW buffer wrap around
|
||||
* @snapshot: is this run in snapshot mode
|
||||
* @data_pages: a handle the ring buffer
|
||||
*/
|
||||
|
@ -85,7 +84,6 @@ struct cs_buffers {
|
|||
unsigned int nr_pages;
|
||||
unsigned long offset;
|
||||
local_t data_size;
|
||||
local_t lost;
|
||||
bool snapshot;
|
||||
void **data_pages;
|
||||
};
|
||||
|
|
|
@ -329,7 +329,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
|
|||
|
||||
static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost)
|
||||
void *sink_config)
|
||||
{
|
||||
long size = 0;
|
||||
struct cs_buffers *buf = sink_config;
|
||||
|
@ -350,7 +350,6 @@ static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
|
|||
* resetting parameters here and squaring off with the ring
|
||||
* buffer API in the tracer PMU is fine.
|
||||
*/
|
||||
*lost = !!local_xchg(&buf->lost, 0);
|
||||
size = local_xchg(&buf->data_size, 0);
|
||||
}
|
||||
|
||||
|
@ -389,7 +388,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
|
|||
*/
|
||||
status = readl_relaxed(drvdata->base + TMC_STS);
|
||||
if (status & TMC_STS_FULL) {
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
to_read = drvdata->size;
|
||||
} else {
|
||||
to_read = CIRC_CNT(write_ptr, read_ptr, drvdata->size);
|
||||
|
@ -434,7 +433,7 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
|
|||
read_ptr -= drvdata->size;
|
||||
/* Tell the HW */
|
||||
writel_relaxed(read_ptr, drvdata->base + TMC_RRP);
|
||||
local_inc(&buf->lost);
|
||||
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
|
||||
}
|
||||
|
||||
cur = buf->cur;
|
||||
|
|
|
@ -1234,7 +1234,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
|
|||
|
||||
build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
|
||||
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (!domain->dev_iommu[i])
|
||||
continue;
|
||||
|
||||
|
@ -1278,7 +1278,7 @@ static void domain_flush_complete(struct protection_domain *domain)
|
|||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (domain && !domain->dev_iommu[i])
|
||||
continue;
|
||||
|
||||
|
@ -3363,7 +3363,7 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
|
|||
* IOMMU TLB needs to be flushed before Device TLB to
|
||||
* prevent device TLB refill from IOMMU TLB
|
||||
*/
|
||||
for (i = 0; i < amd_iommus_present; ++i) {
|
||||
for (i = 0; i < amd_iommu_get_num_iommus(); ++i) {
|
||||
if (domain->dev_iommu[i] == 0)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -167,7 +167,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
|
|||
|
||||
/* Array to assign indices to IOMMUs*/
|
||||
struct amd_iommu *amd_iommus[MAX_IOMMUS];
|
||||
int amd_iommus_present;
|
||||
|
||||
/* Number of IOMMUs present in the system */
|
||||
static int amd_iommus_present;
|
||||
|
||||
/* IOMMUs have a non-present cache? */
|
||||
bool amd_iommu_np_cache __read_mostly;
|
||||
|
@ -254,10 +256,6 @@ static int amd_iommu_enable_interrupts(void);
|
|||
static int __init iommu_go_to_state(enum iommu_init_state state);
|
||||
static void init_device_table_dma(void);
|
||||
|
||||
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
|
||||
u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write);
|
||||
|
||||
static inline void update_last_devid(u16 devid)
|
||||
{
|
||||
if (devid > amd_iommu_last_bdf)
|
||||
|
@ -272,6 +270,11 @@ static inline unsigned long tbl_size(int entry_size)
|
|||
return 1UL << shift;
|
||||
}
|
||||
|
||||
int amd_iommu_get_num_iommus(void)
|
||||
{
|
||||
return amd_iommus_present;
|
||||
}
|
||||
|
||||
/* Access to l1 and l2 indexed register spaces */
|
||||
|
||||
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
|
||||
|
@ -1336,7 +1339,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
|
|||
|
||||
/* Add IOMMU to internal data structures */
|
||||
list_add_tail(&iommu->list, &amd_iommu_list);
|
||||
iommu->index = amd_iommus_present++;
|
||||
iommu->index = amd_iommus_present++;
|
||||
|
||||
if (unlikely(iommu->index >= MAX_IOMMUS)) {
|
||||
WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
|
||||
|
@ -1477,6 +1480,8 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write);
|
||||
|
||||
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
|
||||
{
|
||||
|
@ -1488,8 +1493,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
|
|||
amd_iommu_pc_present = true;
|
||||
|
||||
/* Check if the performance counters can be written to */
|
||||
if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
|
||||
(0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
|
||||
if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
|
||||
(iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
|
||||
(val != val2)) {
|
||||
pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
|
||||
amd_iommu_pc_present = false;
|
||||
|
@ -2711,6 +2716,18 @@ bool amd_iommu_v2_supported(void)
|
|||
}
|
||||
EXPORT_SYMBOL(amd_iommu_v2_supported);
|
||||
|
||||
struct amd_iommu *get_amd_iommu(unsigned int idx)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
for_each_iommu(iommu)
|
||||
if (i++ == idx)
|
||||
return iommu;
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(get_amd_iommu);
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* IOMMU EFR Performance Counter support functionality. This code allows
|
||||
|
@ -2718,17 +2735,14 @@ EXPORT_SYMBOL(amd_iommu_v2_supported);
|
|||
*
|
||||
****************************************************************************/
|
||||
|
||||
u8 amd_iommu_pc_get_max_banks(u16 devid)
|
||||
u8 amd_iommu_pc_get_max_banks(unsigned int idx)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u8 ret = 0;
|
||||
struct amd_iommu *iommu = get_amd_iommu(idx);
|
||||
|
||||
/* locate the iommu governing the devid */
|
||||
iommu = amd_iommu_rlookup_table[devid];
|
||||
if (iommu)
|
||||
ret = iommu->max_banks;
|
||||
return iommu->max_banks;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
|
||||
|
||||
|
@ -2738,62 +2752,69 @@ bool amd_iommu_pc_supported(void)
|
|||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_supported);
|
||||
|
||||
u8 amd_iommu_pc_get_max_counters(u16 devid)
|
||||
u8 amd_iommu_pc_get_max_counters(unsigned int idx)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
u8 ret = 0;
|
||||
struct amd_iommu *iommu = get_amd_iommu(idx);
|
||||
|
||||
/* locate the iommu governing the devid */
|
||||
iommu = amd_iommu_rlookup_table[devid];
|
||||
if (iommu)
|
||||
ret = iommu->max_counters;
|
||||
return iommu->max_counters;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
|
||||
|
||||
static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
|
||||
u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write)
|
||||
static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
|
||||
u8 fxn, u64 *value, bool is_write)
|
||||
{
|
||||
u32 offset;
|
||||
u32 max_offset_lim;
|
||||
|
||||
/* Check for valid iommu and pc register indexing */
|
||||
if (WARN_ON((fxn > 0x28) || (fxn & 7)))
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_present)
|
||||
return -ENODEV;
|
||||
|
||||
offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
|
||||
/* Check for valid iommu and pc register indexing */
|
||||
if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
|
||||
return -ENODEV;
|
||||
|
||||
offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
|
||||
|
||||
/* Limit the offset to the hw defined mmio region aperture */
|
||||
max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
|
||||
max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
|
||||
(iommu->max_counters << 8) | 0x28);
|
||||
if ((offset < MMIO_CNTR_REG_OFFSET) ||
|
||||
(offset > max_offset_lim))
|
||||
return -EINVAL;
|
||||
|
||||
if (is_write) {
|
||||
writel((u32)*value, iommu->mmio_base + offset);
|
||||
writel((*value >> 32), iommu->mmio_base + offset + 4);
|
||||
u64 val = *value & GENMASK_ULL(47, 0);
|
||||
|
||||
writel((u32)val, iommu->mmio_base + offset);
|
||||
writel((val >> 32), iommu->mmio_base + offset + 4);
|
||||
} else {
|
||||
*value = readl(iommu->mmio_base + offset + 4);
|
||||
*value <<= 32;
|
||||
*value = readl(iommu->mmio_base + offset);
|
||||
*value |= readl(iommu->mmio_base + offset);
|
||||
*value &= GENMASK_ULL(47, 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
|
||||
|
||||
int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write)
|
||||
int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
|
||||
{
|
||||
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
|
||||
if (!iommu)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure the IOMMU PC resource is available */
|
||||
if (!amd_iommu_pc_present || iommu == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
|
||||
value, is_write);
|
||||
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_get_reg);
|
||||
|
||||
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
|
||||
{
|
||||
if (!iommu)
|
||||
return -EINVAL;
|
||||
|
||||
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
|
||||
}
|
||||
EXPORT_SYMBOL(amd_iommu_pc_set_reg);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
#include "amd_iommu_types.h"
|
||||
|
||||
extern int amd_iommu_get_num_iommus(void);
|
||||
extern int amd_iommu_init_dma_ops(void);
|
||||
extern int amd_iommu_init_passthrough(void);
|
||||
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
|
||||
|
@ -56,13 +57,6 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
|
|||
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
|
||||
extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
|
||||
|
||||
/* IOMMU Performance Counter functions */
|
||||
extern bool amd_iommu_pc_supported(void);
|
||||
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
|
||||
u64 *value, bool is_write);
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
|
||||
#else
|
||||
|
|
|
@ -611,9 +611,6 @@ extern struct list_head amd_iommu_list;
|
|||
*/
|
||||
extern struct amd_iommu *amd_iommus[MAX_IOMMUS];
|
||||
|
||||
/* Number of IOMMUs present in the system */
|
||||
extern int amd_iommus_present;
|
||||
|
||||
/*
|
||||
* Declarations for the global list of all protection domains
|
||||
*/
|
||||
|
|
|
@ -201,7 +201,7 @@ struct coresight_ops_sink {
|
|||
void *sink_config);
|
||||
unsigned long (*reset_buffer)(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config, bool *lost);
|
||||
void *sink_config);
|
||||
void (*update_buffer)(struct coresight_device *csdev,
|
||||
struct perf_output_handle *handle,
|
||||
void *sink_config);
|
||||
|
|
|
@ -267,6 +267,8 @@ extern int arch_init_kprobes(void);
|
|||
extern void show_registers(struct pt_regs *regs);
|
||||
extern void kprobes_inc_nmissed_count(struct kprobe *p);
|
||||
extern bool arch_within_kprobe_blacklist(unsigned long addr);
|
||||
extern bool arch_function_offset_within_entry(unsigned long offset);
|
||||
extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
|
||||
|
||||
extern bool within_kprobe_blacklist(unsigned long addr);
|
||||
|
||||
|
|
|
@ -165,6 +165,13 @@ struct hw_perf_event {
|
|||
struct list_head bp_list;
|
||||
};
|
||||
#endif
|
||||
struct { /* amd_iommu */
|
||||
u8 iommu_bank;
|
||||
u8 iommu_cntr;
|
||||
u16 padding;
|
||||
u64 conf;
|
||||
u64 conf1;
|
||||
};
|
||||
};
|
||||
/*
|
||||
* If the event is a per task event, this will point to the task in
|
||||
|
@ -801,6 +808,7 @@ struct perf_output_handle {
|
|||
struct ring_buffer *rb;
|
||||
unsigned long wakeup;
|
||||
unsigned long size;
|
||||
u64 aux_flags;
|
||||
union {
|
||||
void *addr;
|
||||
unsigned long head;
|
||||
|
@ -849,10 +857,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
|
|||
extern void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event);
|
||||
extern void perf_aux_output_end(struct perf_output_handle *handle,
|
||||
unsigned long size, bool truncated);
|
||||
unsigned long size);
|
||||
extern int perf_aux_output_skip(struct perf_output_handle *handle,
|
||||
unsigned long size);
|
||||
extern void *perf_get_aux(struct perf_output_handle *handle);
|
||||
extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
|
||||
|
||||
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
|
||||
extern void perf_pmu_unregister(struct pmu *pmu);
|
||||
|
@ -1112,6 +1121,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
|
|||
|
||||
extern void perf_event_exec(void);
|
||||
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
||||
extern void perf_event_namespaces(struct task_struct *tsk);
|
||||
extern void perf_event_fork(struct task_struct *tsk);
|
||||
|
||||
/* Callchains */
|
||||
|
@ -1267,8 +1277,8 @@ static inline void *
|
|||
perf_aux_output_begin(struct perf_output_handle *handle,
|
||||
struct perf_event *event) { return NULL; }
|
||||
static inline void
|
||||
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated) { }
|
||||
perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
{ }
|
||||
static inline int
|
||||
perf_aux_output_skip(struct perf_output_handle *handle,
|
||||
unsigned long size) { return -EINVAL; }
|
||||
|
@ -1315,6 +1325,7 @@ static inline int perf_unregister_guest_info_callbacks
|
|||
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
|
||||
static inline void perf_event_exec(void) { }
|
||||
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
|
||||
static inline void perf_event_namespaces(struct task_struct *tsk) { }
|
||||
static inline void perf_event_fork(struct task_struct *tsk) { }
|
||||
static inline void perf_event_init(void) { }
|
||||
static inline int perf_swevent_get_recursion_context(void) { return -1; }
|
||||
|
|
|
@ -344,7 +344,8 @@ struct perf_event_attr {
|
|||
use_clockid : 1, /* use @clockid for time fields */
|
||||
context_switch : 1, /* context switch data */
|
||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||
__reserved_1 : 36;
|
||||
namespaces : 1, /* include namespaces data */
|
||||
__reserved_1 : 35;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
|
@ -610,6 +611,23 @@ struct perf_event_header {
|
|||
__u16 size;
|
||||
};
|
||||
|
||||
struct perf_ns_link_info {
|
||||
__u64 dev;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
enum {
|
||||
NET_NS_INDEX = 0,
|
||||
UTS_NS_INDEX = 1,
|
||||
IPC_NS_INDEX = 2,
|
||||
PID_NS_INDEX = 3,
|
||||
USER_NS_INDEX = 4,
|
||||
MNT_NS_INDEX = 5,
|
||||
CGROUP_NS_INDEX = 6,
|
||||
|
||||
NR_NAMESPACES, /* number of available namespaces */
|
||||
};
|
||||
|
||||
enum perf_event_type {
|
||||
|
||||
/*
|
||||
|
@ -862,6 +880,18 @@ enum perf_event_type {
|
|||
*/
|
||||
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
||||
|
||||
/*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u32 pid;
|
||||
* u32 tid;
|
||||
* u64 nr_namespaces;
|
||||
* { u64 dev, inode; } [nr_namespaces];
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_NAMESPACES = 16,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
|
@ -885,6 +915,7 @@ enum perf_callchain_context {
|
|||
*/
|
||||
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
|
||||
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
|
||||
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
|
||||
|
||||
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
|
||||
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
|
||||
|
|
|
@ -48,6 +48,8 @@
|
|||
#include <linux/parser.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#include <linux/mount.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
|||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
static atomic_t nr_namespaces_events __read_mostly;
|
||||
static atomic_t nr_task_events __read_mostly;
|
||||
static atomic_t nr_freq_events __read_mostly;
|
||||
static atomic_t nr_switch_events __read_mostly;
|
||||
|
@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
|
|||
atomic_dec(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_dec(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_dec(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_dec(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
|
@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
|
|||
void perf_event_fork(struct task_struct *task)
|
||||
{
|
||||
perf_event_task(task, NULL, 1);
|
||||
perf_event_namespaces(task);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
|
|||
perf_event_comm_event(&comm_event);
|
||||
}
|
||||
|
||||
/*
|
||||
* namespaces tracking
|
||||
*/
|
||||
|
||||
struct perf_namespaces_event {
|
||||
struct task_struct *task;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
u64 nr_namespaces;
|
||||
struct perf_ns_link_info link_info[NR_NAMESPACES];
|
||||
} event_id;
|
||||
};
|
||||
|
||||
static int perf_event_namespaces_match(struct perf_event *event)
|
||||
{
|
||||
return event->attr.namespaces;
|
||||
}
|
||||
|
||||
static void perf_event_namespaces_output(struct perf_event *event,
|
||||
void *data)
|
||||
{
|
||||
struct perf_namespaces_event *namespaces_event = data;
|
||||
struct perf_output_handle handle;
|
||||
struct perf_sample_data sample;
|
||||
int ret;
|
||||
|
||||
if (!perf_event_namespaces_match(event))
|
||||
return;
|
||||
|
||||
perf_event_header__init_id(&namespaces_event->event_id.header,
|
||||
&sample, event);
|
||||
ret = perf_output_begin(&handle, event,
|
||||
namespaces_event->event_id.header.size);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
namespaces_event->event_id.pid = perf_event_pid(event,
|
||||
namespaces_event->task);
|
||||
namespaces_event->event_id.tid = perf_event_tid(event,
|
||||
namespaces_event->task);
|
||||
|
||||
perf_output_put(&handle, namespaces_event->event_id);
|
||||
|
||||
perf_event__output_id_sample(event, &handle, &sample);
|
||||
|
||||
perf_output_end(&handle);
|
||||
}
|
||||
|
||||
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
|
||||
struct task_struct *task,
|
||||
const struct proc_ns_operations *ns_ops)
|
||||
{
|
||||
struct path ns_path;
|
||||
struct inode *ns_inode;
|
||||
void *error;
|
||||
|
||||
error = ns_get_path(&ns_path, task, ns_ops);
|
||||
if (!error) {
|
||||
ns_inode = ns_path.dentry->d_inode;
|
||||
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
|
||||
ns_link_info->ino = ns_inode->i_ino;
|
||||
}
|
||||
}
|
||||
|
||||
void perf_event_namespaces(struct task_struct *task)
|
||||
{
|
||||
struct perf_namespaces_event namespaces_event;
|
||||
struct perf_ns_link_info *ns_link_info;
|
||||
|
||||
if (!atomic_read(&nr_namespaces_events))
|
||||
return;
|
||||
|
||||
namespaces_event = (struct perf_namespaces_event){
|
||||
.task = task,
|
||||
.event_id = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_NAMESPACES,
|
||||
.misc = 0,
|
||||
.size = sizeof(namespaces_event.event_id),
|
||||
},
|
||||
/* .pid */
|
||||
/* .tid */
|
||||
.nr_namespaces = NR_NAMESPACES,
|
||||
/* .link_info[NR_NAMESPACES] */
|
||||
},
|
||||
};
|
||||
|
||||
ns_link_info = namespaces_event.event_id.link_info;
|
||||
|
||||
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
|
||||
task, &mntns_operations);
|
||||
|
||||
#ifdef CONFIG_USER_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
|
||||
task, &userns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_NET_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
|
||||
task, &netns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_UTS_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
|
||||
task, &utsns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_IPC_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
|
||||
task, &ipcns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_PID_NS
|
||||
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
|
||||
task, &pidns_operations);
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUPS
|
||||
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
|
||||
task, &cgroupns_operations);
|
||||
#endif
|
||||
|
||||
perf_iterate_sb(perf_event_namespaces_output,
|
||||
&namespaces_event,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* mmap tracking
|
||||
*/
|
||||
|
@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
|
|||
atomic_inc(&nr_mmap_events);
|
||||
if (event->attr.comm)
|
||||
atomic_inc(&nr_comm_events);
|
||||
if (event->attr.namespaces)
|
||||
atomic_inc(&nr_namespaces_events);
|
||||
if (event->attr.task)
|
||||
atomic_inc(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
|
@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.namespaces) {
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr.freq) {
|
||||
if (attr.sample_freq > sysctl_perf_event_sample_rate)
|
||||
return -EINVAL;
|
||||
|
|
|
@ -297,6 +297,19 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
|||
rb->paused = 1;
|
||||
}
|
||||
|
||||
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
|
||||
{
|
||||
/*
|
||||
* OVERWRITE is determined by perf_aux_output_end() and can't
|
||||
* be passed in directly.
|
||||
*/
|
||||
if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
|
||||
return;
|
||||
|
||||
handle->aux_flags |= flags;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_aux_output_flag);
|
||||
|
||||
/*
|
||||
* This is called before hardware starts writing to the AUX area to
|
||||
* obtain an output handle and make sure there's room in the buffer.
|
||||
|
@ -360,6 +373,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
|||
handle->event = event;
|
||||
handle->head = aux_head;
|
||||
handle->size = 0;
|
||||
handle->aux_flags = 0;
|
||||
|
||||
/*
|
||||
* In overwrite mode, AUX data stores do not depend on aux_tail,
|
||||
|
@ -408,34 +422,32 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
|||
* of the AUX buffer management code is that after pmu::stop(), the AUX
|
||||
* transaction must be stopped and therefore drop the AUX reference count.
|
||||
*/
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||
bool truncated)
|
||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||
{
|
||||
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
|
||||
struct ring_buffer *rb = handle->rb;
|
||||
bool wakeup = truncated;
|
||||
unsigned long aux_head;
|
||||
u64 flags = 0;
|
||||
|
||||
if (truncated)
|
||||
flags |= PERF_AUX_FLAG_TRUNCATED;
|
||||
|
||||
/* in overwrite mode, driver provides aux_head via handle */
|
||||
if (rb->aux_overwrite) {
|
||||
flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = handle->head;
|
||||
local_set(&rb->aux_head, aux_head);
|
||||
} else {
|
||||
handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
|
||||
|
||||
aux_head = local_read(&rb->aux_head);
|
||||
local_add(size, &rb->aux_head);
|
||||
}
|
||||
|
||||
if (size || flags) {
|
||||
if (size || handle->aux_flags) {
|
||||
/*
|
||||
* Only send RECORD_AUX if we have something useful to communicate
|
||||
*/
|
||||
|
||||
perf_event_aux_event(handle->event, aux_head, size, flags);
|
||||
perf_event_aux_event(handle->event, aux_head, size,
|
||||
handle->aux_flags);
|
||||
}
|
||||
|
||||
aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
|
||||
|
@ -446,7 +458,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
|||
}
|
||||
|
||||
if (wakeup) {
|
||||
if (truncated)
|
||||
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
|
||||
handle->event->pending_disable = 1;
|
||||
perf_output_wakeup(handle);
|
||||
}
|
||||
|
|
|
@ -2353,6 +2353,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
|||
}
|
||||
}
|
||||
|
||||
perf_event_namespaces(current);
|
||||
|
||||
bad_unshare_cleanup_cred:
|
||||
if (new_cred)
|
||||
put_cred(new_cred);
|
||||
|
|
|
@ -1391,21 +1391,19 @@ bool within_kprobe_blacklist(unsigned long addr)
|
|||
* This returns encoded errors if it fails to look up symbol or invalid
|
||||
* combination of parameters.
|
||||
*/
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
|
||||
const char *symbol_name, unsigned int offset)
|
||||
{
|
||||
kprobe_opcode_t *addr = p->addr;
|
||||
|
||||
if ((p->symbol_name && p->addr) ||
|
||||
(!p->symbol_name && !p->addr))
|
||||
if ((symbol_name && addr) || (!symbol_name && !addr))
|
||||
goto invalid;
|
||||
|
||||
if (p->symbol_name) {
|
||||
kprobe_lookup_name(p->symbol_name, addr);
|
||||
if (symbol_name) {
|
||||
kprobe_lookup_name(symbol_name, addr);
|
||||
if (!addr)
|
||||
return ERR_PTR(-ENOENT);
|
||||
}
|
||||
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
|
||||
addr = (kprobe_opcode_t *)(((char *)addr) + offset);
|
||||
if (addr)
|
||||
return addr;
|
||||
|
||||
|
@ -1413,6 +1411,11 @@ static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
|||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
{
|
||||
return _kprobe_addr(p->addr, p->symbol_name, p->offset);
|
||||
}
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
|
@ -1740,11 +1743,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobes);
|
||||
|
||||
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
int __weak kprobe_exceptions_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
|
||||
|
||||
static struct notifier_block kprobe_exceptions_nb = {
|
||||
.notifier_call = kprobe_exceptions_notify,
|
||||
|
@ -1875,6 +1879,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
|||
}
|
||||
NOKPROBE_SYMBOL(pre_handler_kretprobe);
|
||||
|
||||
bool __weak arch_function_offset_within_entry(unsigned long offset)
|
||||
{
|
||||
return !offset;
|
||||
}
|
||||
|
||||
bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
|
||||
{
|
||||
kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
|
||||
|
||||
if (IS_ERR(kp_addr))
|
||||
return false;
|
||||
|
||||
if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
|
||||
!arch_function_offset_within_entry(offset))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -1882,6 +1905,9 @@ int register_kretprobe(struct kretprobe *rp)
|
|||
int i;
|
||||
void *addr;
|
||||
|
||||
if (!function_offset_within_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
|
||||
return -EINVAL;
|
||||
|
||||
if (kretprobe_blacklist_size) {
|
||||
addr = kprobe_addr(&rp->kp);
|
||||
if (IS_ERR(addr))
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/file.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
static struct kmem_cache *nsproxy_cachep;
|
||||
|
||||
|
@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
|
|||
goto out;
|
||||
}
|
||||
switch_task_namespaces(tsk, new_nsproxy);
|
||||
|
||||
perf_event_namespaces(tsk);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
|
|
|
@ -455,7 +455,7 @@ config UPROBE_EVENTS
|
|||
select UPROBES
|
||||
select PROBE_EVENTS
|
||||
select TRACING
|
||||
default n
|
||||
default y
|
||||
help
|
||||
This allows the user to add tracing events on top of userspace
|
||||
dynamic events (similar to tracepoints) on the fly via the trace
|
||||
|
|
|
@ -4355,6 +4355,7 @@ static const char readme_msg[] =
|
|||
"\t -:[<group>/]<event>\n"
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
"\t place: <path>:<offset>\n"
|
||||
|
|
|
@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
|
|||
return -EINVAL;
|
||||
}
|
||||
if (isdigit(argv[1][0])) {
|
||||
if (is_return) {
|
||||
pr_info("Return probe point must be a symbol.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
/* an address specified */
|
||||
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
|
||||
if (ret) {
|
||||
|
@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
|
|||
pr_info("Failed to parse symbol.\n");
|
||||
return ret;
|
||||
}
|
||||
if (offset && is_return) {
|
||||
pr_info("Return probe must be used without offset.\n");
|
||||
if (offset && is_return &&
|
||||
!function_offset_within_entry(NULL, symbol, offset)) {
|
||||
pr_info("Given offset is not valid for return probe.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -181,10 +181,23 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* KVM_IRQ_LINE irq field index values */
|
||||
|
|
|
@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
|
|||
#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
|
||||
#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
|
||||
(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
|
||||
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
|
||||
#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
|
||||
#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
|
||||
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
|
||||
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
|
||||
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
|
||||
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
|
||||
#define VGIC_LEVEL_INFO_LINE_LEVEL 0
|
||||
|
||||
#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
|
||||
|
||||
/* Device Control API on vcpu fd */
|
||||
|
|
|
@ -413,6 +413,26 @@ struct kvm_get_htab_header {
|
|||
__u16 n_invalid;
|
||||
};
|
||||
|
||||
/* For KVM_PPC_CONFIGURE_V3_MMU */
|
||||
struct kvm_ppc_mmuv3_cfg {
|
||||
__u64 flags;
|
||||
__u64 process_table; /* second doubleword of partition table entry */
|
||||
};
|
||||
|
||||
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
|
||||
#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
|
||||
#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
|
||||
|
||||
/* For KVM_PPC_GET_RMMU_INFO */
|
||||
struct kvm_ppc_rmmu_info {
|
||||
struct kvm_ppc_radix_geom {
|
||||
__u8 page_shift;
|
||||
__u8 level_bits[4];
|
||||
__u8 pad[3];
|
||||
} geometries[8];
|
||||
__u32 ap_encodings[8];
|
||||
};
|
||||
|
||||
/* Per-vcpu XICS interrupt controller state */
|
||||
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
|
||||
|
||||
|
@ -613,5 +633,7 @@ struct kvm_get_htab_header {
|
|||
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
|
||||
#define KVM_XICS_MASKED (1ULL << 41)
|
||||
#define KVM_XICS_PENDING (1ULL << 42)
|
||||
#define KVM_XICS_PRESENTED (1ULL << 43)
|
||||
#define KVM_XICS_QUEUED (1ULL << 44)
|
||||
|
||||
#endif /* __LINUX_KVM_POWERPC_H */
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
#define LOCK_PREFIX "\n\tlock; "
|
||||
|
||||
#include <asm/cmpxchg.h>
|
||||
|
||||
/*
|
||||
* Atomic operations that C can't guarantee us. Useful for
|
||||
* resource counting etc..
|
||||
|
@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
|
|||
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
|
||||
}
|
||||
|
||||
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
|
||||
{
|
||||
return cmpxchg(&v->counter, old, new);
|
||||
}
|
||||
|
||||
#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
#ifndef TOOLS_ASM_X86_CMPXCHG_H
|
||||
#define TOOLS_ASM_X86_CMPXCHG_H
|
||||
|
||||
#include <linux/compiler.h>
|
||||
|
||||
/*
|
||||
* Non-existant functions to indicate usage errors at link time
|
||||
* (or compile-time if the compiler implements __compiletime_error().
|
||||
*/
|
||||
extern void __cmpxchg_wrong_size(void)
|
||||
__compiletime_error("Bad argument size for cmpxchg");
|
||||
|
||||
/*
|
||||
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
|
||||
* -1 because sizeof will never return -1, thereby making those switch
|
||||
* case statements guaranteeed dead code which the compiler will
|
||||
* eliminate, and allowing the "missing symbol in the default case" to
|
||||
* indicate a usage error.
|
||||
*/
|
||||
#define __X86_CASE_B 1
|
||||
#define __X86_CASE_W 2
|
||||
#define __X86_CASE_L 4
|
||||
#ifdef __x86_64__
|
||||
#define __X86_CASE_Q 8
|
||||
#else
|
||||
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Atomic compare and exchange. Compare OLD with MEM, if identical,
|
||||
* store NEW in MEM. Return the initial value in MEM. Success is
|
||||
* indicated by comparing RETURN with OLD.
|
||||
*/
|
||||
#define __raw_cmpxchg(ptr, old, new, size, lock) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__typeof__(*(ptr)) __old = (old); \
|
||||
__typeof__(*(ptr)) __new = (new); \
|
||||
switch (size) { \
|
||||
case __X86_CASE_B: \
|
||||
{ \
|
||||
volatile u8 *__ptr = (volatile u8 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgb %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "q" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_W: \
|
||||
{ \
|
||||
volatile u16 *__ptr = (volatile u16 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgw %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_L: \
|
||||
{ \
|
||||
volatile u32 *__ptr = (volatile u32 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgl %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
case __X86_CASE_Q: \
|
||||
{ \
|
||||
volatile u64 *__ptr = (volatile u64 *)(ptr); \
|
||||
asm volatile(lock "cmpxchgq %2,%1" \
|
||||
: "=a" (__ret), "+m" (*__ptr) \
|
||||
: "r" (__new), "0" (__old) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
} \
|
||||
default: \
|
||||
__cmpxchg_wrong_size(); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define __cmpxchg(ptr, old, new, size) \
|
||||
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
|
||||
|
||||
#define cmpxchg(ptr, old, new) \
|
||||
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
|
||||
|
||||
|
||||
#endif /* TOOLS_ASM_X86_CMPXCHG_H */
|
|
@ -100,7 +100,7 @@
|
|||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
|
||||
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
|
@ -186,7 +186,8 @@
|
|||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
|
||||
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
|
@ -289,7 +290,8 @@
|
|||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
|
||||
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
|
||||
|
@ -321,5 +323,4 @@
|
|||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
|
|
@ -286,7 +286,7 @@ ENDPROC(memcpy_mcsafe_unrolled)
|
|||
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
|
||||
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
|
||||
|
|
|
@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \
|
|||
lzma \
|
||||
get_cpuid \
|
||||
bpf \
|
||||
sched_getcpu \
|
||||
sdt
|
||||
|
||||
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
|
||||
|
|
|
@ -48,21 +48,22 @@ FILES= \
|
|||
test-get_cpuid.bin \
|
||||
test-sdt.bin \
|
||||
test-cxx.bin \
|
||||
test-jvmti.bin
|
||||
test-jvmti.bin \
|
||||
test-sched_getcpu.bin
|
||||
|
||||
FILES := $(addprefix $(OUTPUT),$(FILES))
|
||||
|
||||
CC := $(CROSS_COMPILE)gcc -MD
|
||||
CXX := $(CROSS_COMPILE)g++ -MD
|
||||
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
|
||||
CC ?= $(CROSS_COMPILE)gcc
|
||||
CXX ?= $(CROSS_COMPILE)g++
|
||||
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
|
||||
LLVM_CONFIG ?= llvm-config
|
||||
|
||||
all: $(FILES)
|
||||
|
||||
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
|
||||
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
|
||||
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
|
||||
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
|
||||
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
###############################
|
||||
|
@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
|
|||
$(OUTPUT)test-glibc.bin:
|
||||
$(BUILD)
|
||||
|
||||
$(OUTPUT)test-sched_getcpu.bin:
|
||||
$(BUILD)
|
||||
|
||||
DWARFLIBS := -ldw
|
||||
ifeq ($(findstring -static,${LDFLAGS}),-static)
|
||||
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
|
||||
|
@ -171,7 +175,7 @@ $(OUTPUT)test-libperl.bin:
|
|||
$(BUILD) $(FLAGS_PERL_EMBED)
|
||||
|
||||
$(OUTPUT)test-libpython.bin:
|
||||
$(BUILD)
|
||||
$(BUILD) $(FLAGS_PYTHON_EMBED)
|
||||
|
||||
$(OUTPUT)test-libpython-version.bin:
|
||||
$(BUILD)
|
||||
|
|
|
@ -117,6 +117,10 @@
|
|||
# include "test-pthread-attr-setaffinity-np.c"
|
||||
#undef main
|
||||
|
||||
#define main main_test_sched_getcpu
|
||||
# include "test-sched_getcpu.c"
|
||||
#undef main
|
||||
|
||||
# if 0
|
||||
/*
|
||||
* Disable libbabeltrace check for test-all, because the requested
|
||||
|
@ -182,6 +186,7 @@ int main(int argc, char *argv[])
|
|||
main_test_get_cpuid();
|
||||
main_test_bpf();
|
||||
main_test_libcrypto();
|
||||
main_test_sched_getcpu();
|
||||
main_test_sdt();
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return sched_getcpu();
|
||||
}
|
|
@ -60,4 +60,12 @@ static inline int atomic_dec_and_test(atomic_t *v)
|
|||
return __sync_sub_and_fetch(&v->counter, 1) == 0;
|
||||
}
|
||||
|
||||
#define cmpxchg(ptr, oldval, newval) \
|
||||
__sync_val_compare_and_swap(ptr, oldval, newval)
|
||||
|
||||
static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
|
||||
{
|
||||
return cmpxchg(&(v)->counter, oldval, newval);
|
||||
}
|
||||
|
||||
#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
|
||||
|
|
|
@ -3,4 +3,10 @@
|
|||
|
||||
#include <asm/atomic.h>
|
||||
|
||||
/* atomic_cmpxchg_relaxed */
|
||||
#ifndef atomic_cmpxchg_relaxed
|
||||
#define atomic_cmpxchg_relaxed atomic_cmpxchg
|
||||
#define atomic_cmpxchg_release atomic_cmpxchg
|
||||
#endif /* atomic_cmpxchg_relaxed */
|
||||
|
||||
#endif /* __TOOLS_LINUX_ATOMIC_H */
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef _TOOLS_PERF_LINUX_BUG_H
|
||||
#define _TOOLS_PERF_LINUX_BUG_H
|
||||
|
||||
/* Force a compilation error if condition is true, but also produce a
|
||||
result (of value 0 and type size_t), so the expression can be used
|
||||
e.g. in a structure initializer (or where-ever else comma expressions
|
||||
aren't permitted). */
|
||||
#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
|
||||
|
||||
#endif /* _TOOLS_PERF_LINUX_BUG_H */
|
|
@ -12,3 +12,10 @@
|
|||
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
|
||||
# define __fallthrough __attribute__ ((fallthrough))
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION >= 40300
|
||||
# define __compiletime_error(message) __attribute__((error(message)))
|
||||
#endif /* GCC_VERSION >= 40300 */
|
||||
|
||||
/* &a[0] degrades to a pointer: a different type from an array */
|
||||
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
|
||||
|
|
|
@ -5,6 +5,10 @@
|
|||
#include <linux/compiler-gcc.h>
|
||||
#endif
|
||||
|
||||
#ifndef __compiletime_error
|
||||
# define __compiletime_error(message)
|
||||
#endif
|
||||
|
||||
/* Optimization barrier */
|
||||
/* The "volatile" is due to gcc bugs */
|
||||
#define barrier() __asm__ __volatile__("": : :"memory")
|
||||
|
@ -13,6 +17,11 @@
|
|||
# define __always_inline inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
/* Are two types/vars the same type (ignoring qualifiers)? */
|
||||
#ifndef __same_type
|
||||
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
|
||||
#endif
|
||||
|
||||
#ifdef __ANDROID__
|
||||
/*
|
||||
* FIXME: Big hammer to get rid of tons of:
|
||||
|
|
|
@ -13,10 +13,6 @@
|
|||
#include <linux/hash.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
#define DEFINE_HASHTABLE(name, bits) \
|
||||
struct hlist_head name[1 << (bits)] = \
|
||||
{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
|
||||
|
|
|
@ -4,6 +4,11 @@
|
|||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#ifndef UINT_MAX
|
||||
#define UINT_MAX (~0U)
|
||||
#endif
|
||||
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
|
||||
|
@ -72,6 +77,8 @@
|
|||
int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
|
||||
int scnprintf(char * buf, size_t size, const char * fmt, ...);
|
||||
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
|
||||
|
||||
/*
|
||||
* This looks more complex than it should be. But we need to
|
||||
* get the type for the ~ right in round_down (it needs to be
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
#ifndef _TOOLS_LINUX_LOG2_H
|
||||
#define _TOOLS_LINUX_LOG2_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* non-constant log of base 2 calculators
|
||||
* - the arch may override these in asm/bitops.h if they can be implemented
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
#ifndef _TOOLS_LINUX_REFCOUNT_H
|
||||
#define _TOOLS_LINUX_REFCOUNT_H
|
||||
|
||||
/*
|
||||
* Variant of atomic_t specialized for reference counts.
|
||||
*
|
||||
* The interface matches the atomic_t interface (to aid in porting) but only
|
||||
* provides the few functions one should use for reference counting.
|
||||
*
|
||||
* It differs in that the counter saturates at UINT_MAX and will not move once
|
||||
* there. This avoids wrapping the counter and causing 'spurious'
|
||||
* use-after-free issues.
|
||||
*
|
||||
* Memory ordering rules are slightly relaxed wrt regular atomic_t functions
|
||||
* and provide only what is strictly required for refcounts.
|
||||
*
|
||||
* The increments are fully relaxed; these will not provide ordering. The
|
||||
* rationale is that whatever is used to obtain the object we're increasing the
|
||||
* reference count on will provide the ordering. For locked data structures,
|
||||
* its the lock acquire, for RCU/lockless data structures its the dependent
|
||||
* load.
|
||||
*
|
||||
* Do note that inc_not_zero() provides a control dependency which will order
|
||||
* future stores against the inc, this ensures we'll never modify the object
|
||||
* if we did not in fact acquire a reference.
|
||||
*
|
||||
* The decrements will provide release order, such that all the prior loads and
|
||||
* stores will be issued before, it also provides a control dependency, which
|
||||
* will order us against the subsequent free().
|
||||
*
|
||||
* The control dependency is against the load of the cmpxchg (ll/sc) that
|
||||
* succeeded. This means the stores aren't fully ordered, but this is fine
|
||||
* because the 1->0 transition indicates no concurrency.
|
||||
*
|
||||
* Note that the allocator is responsible for ordering things between free()
|
||||
* and alloc().
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define REFCOUNT_WARN(cond, str) (void)(cond)
|
||||
#define __refcount_check
|
||||
#else
|
||||
#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
|
||||
#define __refcount_check __must_check
|
||||
#endif
|
||||
|
||||
typedef struct refcount_struct {
|
||||
atomic_t refs;
|
||||
} refcount_t;
|
||||
|
||||
#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
|
||||
|
||||
static inline void refcount_set(refcount_t *r, unsigned int n)
|
||||
{
|
||||
atomic_set(&r->refs, n);
|
||||
}
|
||||
|
||||
static inline unsigned int refcount_read(const refcount_t *r)
|
||||
{
|
||||
return atomic_read(&r->refs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller has guaranteed the
|
||||
* object memory to be stable (RCU, etc.). It does provide a control dependency
|
||||
* and thereby orders future stores. See the comment on top.
|
||||
*/
|
||||
static inline __refcount_check
|
||||
bool refcount_inc_not_zero(refcount_t *r)
|
||||
{
|
||||
unsigned int old, new, val = atomic_read(&r->refs);
|
||||
|
||||
for (;;) {
|
||||
new = val + 1;
|
||||
|
||||
if (!val)
|
||||
return false;
|
||||
|
||||
if (unlikely(!new))
|
||||
return true;
|
||||
|
||||
old = atomic_cmpxchg_relaxed(&r->refs, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
|
||||
*
|
||||
* Provides no memory ordering, it is assumed the caller already has a
|
||||
* reference on the object, will WARN when this is not so.
|
||||
*/
|
||||
static inline void refcount_inc(refcount_t *r)
|
||||
{
|
||||
REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
|
||||
* decrement when saturated at UINT_MAX.
|
||||
*
|
||||
* Provides release memory ordering, such that prior loads and stores are done
|
||||
* before, and provides a control dependency such that free() must come after.
|
||||
* See the comment on top.
|
||||
*/
|
||||
static inline __refcount_check
|
||||
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
|
||||
{
|
||||
unsigned int old, new, val = atomic_read(&r->refs);
|
||||
|
||||
for (;;) {
|
||||
if (unlikely(val == UINT_MAX))
|
||||
return false;
|
||||
|
||||
new = val - i;
|
||||
if (new > val) {
|
||||
REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
old = atomic_cmpxchg_release(&r->refs, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
return !new;
|
||||
}
|
||||
|
||||
static inline __refcount_check
|
||||
bool refcount_dec_and_test(refcount_t *r)
|
||||
{
|
||||
return refcount_sub_and_test(1, r);
|
||||
}
|
||||
|
||||
|
||||
#endif /* _ATOMIC_LINUX_REFCOUNT_H */
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
|
||||
#include <asm/types.h>
|
||||
#include <asm/posix_types.h>
|
||||
|
||||
struct page;
|
||||
struct kmem_cache;
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
#ifndef _UAPI_LINUX_FCNTL_H
|
||||
#define _UAPI_LINUX_FCNTL_H
|
||||
|
||||
#include <asm/fcntl.h>
|
||||
|
||||
#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
|
||||
#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
|
||||
|
||||
/*
|
||||
* Cancel a blocking posix lock; internal use only until we expose an
|
||||
* asynchronous lock api to userspace:
|
||||
*/
|
||||
#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
|
||||
|
||||
/* Create a file descriptor with FD_CLOEXEC set. */
|
||||
#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
|
||||
|
||||
/*
|
||||
* Request nofications on a directory.
|
||||
* See below for events that may be notified.
|
||||
*/
|
||||
#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
|
||||
|
||||
/*
|
||||
* Set and get of pipe page size array
|
||||
*/
|
||||
#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
|
||||
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
|
||||
|
||||
/*
|
||||
* Set/Get seals
|
||||
*/
|
||||
#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
||||
#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
||||
|
||||
/*
|
||||
* Types of seals
|
||||
*/
|
||||
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||
#define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||
/* (1U << 31) is reserved for signed error codes */
|
||||
|
||||
/*
|
||||
* Types of directory notifications that may be requested.
|
||||
*/
|
||||
#define DN_ACCESS 0x00000001 /* File accessed */
|
||||
#define DN_MODIFY 0x00000002 /* File modified */
|
||||
#define DN_CREATE 0x00000004 /* File created */
|
||||
#define DN_DELETE 0x00000008 /* File removed */
|
||||
#define DN_RENAME 0x00000010 /* File renamed */
|
||||
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
|
||||
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
|
||||
|
||||
#define AT_FDCWD -100 /* Special value used to indicate
|
||||
openat should use the current
|
||||
working directory. */
|
||||
#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
|
||||
#define AT_REMOVEDIR 0x200 /* Remove directory instead of
|
||||
unlinking file. */
|
||||
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
|
||||
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
|
||||
#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
|
||||
|
||||
#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
|
||||
#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
|
||||
#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
|
||||
#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
|
||||
|
||||
|
||||
#endif /* _UAPI_LINUX_FCNTL_H */
|
|
@ -344,7 +344,8 @@ struct perf_event_attr {
|
|||
use_clockid : 1, /* use @clockid for time fields */
|
||||
context_switch : 1, /* context switch data */
|
||||
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||
__reserved_1 : 36;
|
||||
namespaces : 1, /* include namespaces data */
|
||||
__reserved_1 : 35;
|
||||
|
||||
union {
|
||||
__u32 wakeup_events; /* wakeup every n events */
|
||||
|
@ -610,6 +611,23 @@ struct perf_event_header {
|
|||
__u16 size;
|
||||
};
|
||||
|
||||
struct perf_ns_link_info {
|
||||
__u64 dev;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
enum {
|
||||
NET_NS_INDEX = 0,
|
||||
UTS_NS_INDEX = 1,
|
||||
IPC_NS_INDEX = 2,
|
||||
PID_NS_INDEX = 3,
|
||||
USER_NS_INDEX = 4,
|
||||
MNT_NS_INDEX = 5,
|
||||
CGROUP_NS_INDEX = 6,
|
||||
|
||||
NR_NAMESPACES, /* number of available namespaces */
|
||||
};
|
||||
|
||||
enum perf_event_type {
|
||||
|
||||
/*
|
||||
|
@ -862,6 +880,18 @@ enum perf_event_type {
|
|||
*/
|
||||
PERF_RECORD_SWITCH_CPU_WIDE = 15,
|
||||
|
||||
/*
|
||||
* struct {
|
||||
* struct perf_event_header header;
|
||||
* u32 pid;
|
||||
* u32 tid;
|
||||
* u64 nr_namespaces;
|
||||
* { u64 dev, inode; } [nr_namespaces];
|
||||
* struct sample_id sample_id;
|
||||
* };
|
||||
*/
|
||||
PERF_RECORD_NAMESPACES = 16,
|
||||
|
||||
PERF_RECORD_MAX, /* non-ABI */
|
||||
};
|
||||
|
||||
|
@ -885,6 +915,7 @@ enum perf_callchain_context {
|
|||
*/
|
||||
#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
|
||||
#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
|
||||
#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
|
||||
|
||||
#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
|
||||
#define PERF_FLAG_FD_OUTPUT (1UL << 1)
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
#ifndef _UAPI_LINUX_STAT_H
|
||||
#define _UAPI_LINUX_STAT_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
|
||||
|
||||
#define S_IFMT 00170000
|
||||
#define S_IFSOCK 0140000
|
||||
#define S_IFLNK 0120000
|
||||
#define S_IFREG 0100000
|
||||
#define S_IFBLK 0060000
|
||||
#define S_IFDIR 0040000
|
||||
#define S_IFCHR 0020000
|
||||
#define S_IFIFO 0010000
|
||||
#define S_ISUID 0004000
|
||||
#define S_ISGID 0002000
|
||||
#define S_ISVTX 0001000
|
||||
|
||||
#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
|
||||
#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
|
||||
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
|
||||
#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
|
||||
#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
|
||||
#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
|
||||
#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
|
||||
|
||||
#define S_IRWXU 00700
|
||||
#define S_IRUSR 00400
|
||||
#define S_IWUSR 00200
|
||||
#define S_IXUSR 00100
|
||||
|
||||
#define S_IRWXG 00070
|
||||
#define S_IRGRP 00040
|
||||
#define S_IWGRP 00020
|
||||
#define S_IXGRP 00010
|
||||
|
||||
#define S_IRWXO 00007
|
||||
#define S_IROTH 00004
|
||||
#define S_IWOTH 00002
|
||||
#define S_IXOTH 00001
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Timestamp structure for the timestamps in struct statx.
|
||||
*
|
||||
* tv_sec holds the number of seconds before (negative) or after (positive)
|
||||
* 00:00:00 1st January 1970 UTC.
|
||||
*
|
||||
* tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
|
||||
* negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
|
||||
*
|
||||
* Note that if both tv_sec and tv_nsec are non-zero, then the two values must
|
||||
* either be both positive or both negative.
|
||||
*
|
||||
* __reserved is held in case we need a yet finer resolution.
|
||||
*/
|
||||
struct statx_timestamp {
|
||||
__s64 tv_sec;
|
||||
__s32 tv_nsec;
|
||||
__s32 __reserved;
|
||||
};
|
||||
|
||||
/*
|
||||
* Structures for the extended file attribute retrieval system call
|
||||
* (statx()).
|
||||
*
|
||||
* The caller passes a mask of what they're specifically interested in as a
|
||||
* parameter to statx(). What statx() actually got will be indicated in
|
||||
* st_mask upon return.
|
||||
*
|
||||
* For each bit in the mask argument:
|
||||
*
|
||||
* - if the datum is not supported:
|
||||
*
|
||||
* - the bit will be cleared, and
|
||||
*
|
||||
* - the datum will be set to an appropriate fabricated value if one is
|
||||
* available (eg. CIFS can take a default uid and gid), otherwise
|
||||
*
|
||||
* - the field will be cleared;
|
||||
*
|
||||
* - otherwise, if explicitly requested:
|
||||
*
|
||||
* - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
|
||||
* set or if the datum is considered out of date, and
|
||||
*
|
||||
* - the field will be filled in and the bit will be set;
|
||||
*
|
||||
* - otherwise, if not requested, but available in approximate form without any
|
||||
* effort, it will be filled in anyway, and the bit will be set upon return
|
||||
* (it might not be up to date, however, and no attempt will be made to
|
||||
* synchronise the internal state first);
|
||||
*
|
||||
* - otherwise the field and the bit will be cleared before returning.
|
||||
*
|
||||
* Items in STATX_BASIC_STATS may be marked unavailable on return, but they
|
||||
* will have values installed for compatibility purposes so that stat() and
|
||||
* co. can be emulated in userspace.
|
||||
*/
|
||||
struct statx {
|
||||
/* 0x00 */
|
||||
__u32 stx_mask; /* What results were written [uncond] */
|
||||
__u32 stx_blksize; /* Preferred general I/O size [uncond] */
|
||||
__u64 stx_attributes; /* Flags conveying information about the file [uncond] */
|
||||
/* 0x10 */
|
||||
__u32 stx_nlink; /* Number of hard links */
|
||||
__u32 stx_uid; /* User ID of owner */
|
||||
__u32 stx_gid; /* Group ID of owner */
|
||||
__u16 stx_mode; /* File mode */
|
||||
__u16 __spare0[1];
|
||||
/* 0x20 */
|
||||
__u64 stx_ino; /* Inode number */
|
||||
__u64 stx_size; /* File size */
|
||||
__u64 stx_blocks; /* Number of 512-byte blocks allocated */
|
||||
__u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
|
||||
/* 0x40 */
|
||||
struct statx_timestamp stx_atime; /* Last access time */
|
||||
struct statx_timestamp stx_btime; /* File creation time */
|
||||
struct statx_timestamp stx_ctime; /* Last attribute change time */
|
||||
struct statx_timestamp stx_mtime; /* Last data modification time */
|
||||
/* 0x80 */
|
||||
__u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
|
||||
__u32 stx_rdev_minor;
|
||||
__u32 stx_dev_major; /* ID of device containing file [uncond] */
|
||||
__u32 stx_dev_minor;
|
||||
/* 0x90 */
|
||||
__u64 __spare2[14]; /* Spare space for future expansion */
|
||||
/* 0x100 */
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags to be stx_mask
|
||||
*
|
||||
* Query request/result mask for statx() and struct statx::stx_mask.
|
||||
*
|
||||
* These bits should be set in the mask argument of statx() to request
|
||||
* particular items when calling statx().
|
||||
*/
|
||||
#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
|
||||
#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
|
||||
#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
|
||||
#define STATX_UID 0x00000008U /* Want/got stx_uid */
|
||||
#define STATX_GID 0x00000010U /* Want/got stx_gid */
|
||||
#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
|
||||
#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
|
||||
#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
|
||||
#define STATX_INO 0x00000100U /* Want/got stx_ino */
|
||||
#define STATX_SIZE 0x00000200U /* Want/got stx_size */
|
||||
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
|
||||
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
|
||||
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
|
||||
#define STATX_ALL 0x00000fffU /* All currently supported flags */
|
||||
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
|
||||
|
||||
/*
|
||||
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
|
||||
*
|
||||
* These give information about the features or the state of a file that might
|
||||
* be of use to ordinary userspace programs such as GUIs or ls rather than
|
||||
* specialised tools.
|
||||
*
|
||||
* Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
|
||||
* semantically. Where possible, the numerical value is picked to correspond
|
||||
* also.
|
||||
*/
|
||||
#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
|
||||
#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
|
||||
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
|
||||
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
|
||||
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
|
||||
|
||||
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
|
||||
|
||||
|
||||
#endif /* _UAPI_LINUX_STAT_H */
|
|
@ -439,6 +439,35 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
|
|||
return filename__read_str(path, buf, sizep);
|
||||
}
|
||||
|
||||
int sysfs__read_bool(const char *entry, bool *value)
|
||||
{
|
||||
char *buf;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
ret = sysfs__read_str(entry, &buf, &size);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
switch (buf[0]) {
|
||||
case '1':
|
||||
case 'y':
|
||||
case 'Y':
|
||||
*value = true;
|
||||
break;
|
||||
case '0':
|
||||
case 'n':
|
||||
case 'N':
|
||||
*value = false;
|
||||
break;
|
||||
default:
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
int sysctl__read_int(const char *sysctl, int *value)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
|
|
|
@ -37,4 +37,5 @@ int sysctl__read_int(const char *sysctl, int *value);
|
|||
int sysfs__read_int(const char *entry, int *value);
|
||||
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
||||
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||
int sysfs__read_bool(const char *entry, bool *value);
|
||||
#endif /* __API_FS__ */
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define __SUBCMD_HELP_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
|
||||
struct cmdnames {
|
||||
size_t alloc;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <ctype.h>
|
||||
#include "symbol/kallsyms.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
|
|
@ -36,8 +36,7 @@
|
|||
#include "warn.h"
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define STATE_FP_SAVED 0x1
|
||||
#define STATE_FP_SETUP 0x2
|
||||
|
|
|
@ -31,11 +31,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <subcmd/exec-cmd.h>
|
||||
#include <subcmd/pager.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "builtin.h"
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
|
||||
|
||||
struct cmd_struct {
|
||||
const char *name;
|
||||
int (*fn)(int, const char **);
|
||||
|
|
|
@ -31,3 +31,5 @@ config.mak.autogen
|
|||
.config-detected
|
||||
util/intel-pt-decoder/inat-tables.c
|
||||
arch/*/include/generated/
|
||||
pmu-events/pmu-events.c
|
||||
pmu-events/jevents
|
||||
|
|
|
@ -50,5 +50,6 @@ libperf-y += util/
|
|||
libperf-y += arch/
|
||||
libperf-y += ui/
|
||||
libperf-y += scripts/
|
||||
libperf-y += trace/beauty/
|
||||
|
||||
gtk-y += ui/gtk/
|
||||
|
|
|
@ -30,6 +30,24 @@ OPTIONS
|
|||
--verbose=::
|
||||
Verbosity level.
|
||||
|
||||
-p::
|
||||
--pid=::
|
||||
Trace on existing process id (comma separated list).
|
||||
|
||||
-a::
|
||||
--all-cpus::
|
||||
Force system-wide collection. Scripts run without a <command>
|
||||
normally use -a by default, while scripts run with a <command>
|
||||
normally don't - this option allows the latter to be run in
|
||||
system-wide mode.
|
||||
|
||||
-C::
|
||||
--cpu=::
|
||||
Only trace for the list of CPUs provided. Multiple CPUs can
|
||||
be provided as a comma separated list with no space like: 0,1.
|
||||
Ranges of CPUs are specified with -: 0-2.
|
||||
Default is to trace on all online CPUs.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
|
|
@ -8,7 +8,7 @@ perf-list - List all symbolic event types
|
|||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
|
||||
'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
@ -24,6 +24,10 @@ Don't print descriptions.
|
|||
--long-desc::
|
||||
Print longer event descriptions.
|
||||
|
||||
--details::
|
||||
Print how named events are resolved internally into perf events, and also
|
||||
any extra expressions computed by perf stat.
|
||||
|
||||
|
||||
[[EVENT_MODIFIERS]]
|
||||
EVENT MODIFIERS
|
||||
|
@ -240,6 +244,8 @@ To limit the list use:
|
|||
|
||||
. 'pmu' to print the kernel supplied PMU events.
|
||||
|
||||
. 'sdt' to list all Statically Defined Tracepoint events.
|
||||
|
||||
. If none of the above is matched, it will apply the supplied glob to all
|
||||
events, printing the ones that match.
|
||||
|
||||
|
|
|
@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
|
|||
displayed with the weight and local_weight sort keys. This currently works for TSX
|
||||
abort events and some memory events in precise mode on modern Intel CPUs.
|
||||
|
||||
--namespaces::
|
||||
Record events of type PERF_RECORD_NAMESPACES.
|
||||
|
||||
--transaction::
|
||||
Record transaction flags for transaction related events.
|
||||
|
||||
|
|
|
@ -72,7 +72,8 @@ OPTIONS
|
|||
--sort=::
|
||||
Sort histogram entries by given key(s) - multiple keys can be specified
|
||||
in CSV format. Following sort keys are available:
|
||||
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
|
||||
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
|
||||
local_weight, cgroup_id.
|
||||
|
||||
Each key has following meaning:
|
||||
|
||||
|
@ -80,6 +81,7 @@ OPTIONS
|
|||
- pid: command and tid of the task
|
||||
- dso: name of library or module executed at the time of sample
|
||||
- symbol: name of function executed at the time of sample
|
||||
- symbol_size: size of function executed at the time of sample
|
||||
- parent: name of function matched to the parent regex filter. Unmatched
|
||||
entries are displayed as "[other]".
|
||||
- cpu: cpu number the task ran at the time of sample
|
||||
|
@ -91,6 +93,7 @@ OPTIONS
|
|||
- weight: Event specific weight, e.g. memory latency or transaction
|
||||
abort cost. This is the global weight.
|
||||
- local_weight: Local weight version of the weight above.
|
||||
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
|
||||
- transaction: Transaction abort flags.
|
||||
- overhead: Overhead percentage of sample
|
||||
- overhead_sys: Overhead percentage of sample running in system mode
|
||||
|
@ -172,6 +175,9 @@ OPTIONS
|
|||
By default, every sort keys not specified in -F will be appended
|
||||
automatically.
|
||||
|
||||
If the keys starts with a prefix '+', then it will append the specified
|
||||
field(s) to the default field order. For example: perf report -F +period,sample.
|
||||
|
||||
-p::
|
||||
--parent=<regex>::
|
||||
A regex filter to identify parent. The parent is a caller of this
|
||||
|
@ -229,6 +235,7 @@ OPTIONS
|
|||
sort_key can be:
|
||||
- function: compare on functions (default)
|
||||
- address: compare on individual code addresses
|
||||
- srcline: compare on source filename and line number
|
||||
|
||||
branch can be:
|
||||
- branch: include last branch information in callgraph when available.
|
||||
|
@ -424,6 +431,10 @@ include::itrace.txt[]
|
|||
--hierarchy::
|
||||
Enable hierarchical output.
|
||||
|
||||
--inline::
|
||||
If a callgraph address belongs to an inlined function, the inline stack
|
||||
will be printed. Each entry is function name or file/line.
|
||||
|
||||
include::callchain-overhead-calculation.txt[]
|
||||
|
||||
SEE ALSO
|
||||
|
|
|
@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
|
|||
--migrations::
|
||||
Show migration events.
|
||||
|
||||
-n::
|
||||
--next::
|
||||
Show next task.
|
||||
|
||||
-I::
|
||||
--idle-hist::
|
||||
Show idle-related events only.
|
||||
|
|
|
@ -116,7 +116,7 @@ OPTIONS
|
|||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
|
||||
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
|
||||
callindent, insn, insnlen. Field list can be prepended with the type, trace, sw or hw,
|
||||
to indicate to which event type the field list applies.
|
||||
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
|
||||
|
@ -189,15 +189,20 @@ OPTIONS
|
|||
i.e., -F "" is not allowed.
|
||||
|
||||
The brstack output includes branch related information with raw addresses using the
|
||||
/v/v/v/v/ syntax in the following order:
|
||||
/v/v/v/v/cycles syntax in the following order:
|
||||
FROM: branch source instruction
|
||||
TO : branch target instruction
|
||||
M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
|
||||
X/- : X=branch inside a transactional region, -=not in transaction region or not supported
|
||||
A/- : A=TSX abort entry, -=not aborted region or not supported
|
||||
cycles
|
||||
|
||||
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
|
||||
|
||||
When brstackinsn is specified the full assembler sequences of branch sequences for each sample
|
||||
is printed. This is the full execution path leading to the sample. This is only supported when the
|
||||
sample was recorded with perf record -b or -j any.
|
||||
|
||||
-k::
|
||||
--vmlinux=<file>::
|
||||
vmlinux pathname
|
||||
|
@ -248,6 +253,9 @@ OPTIONS
|
|||
--show-mmap-events
|
||||
Display mmap related events (e.g. MMAP, MMAP2).
|
||||
|
||||
--show-namespace-events
|
||||
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
|
||||
|
||||
--show-switch-events
|
||||
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
|
||||
PERF_RECORD_SWITCH_CPU_WIDE.
|
||||
|
@ -299,6 +307,10 @@ include::itrace.txt[]
|
|||
stop time is not given (i.e, time string is 'x.y,') then analysis goes
|
||||
to end of file.
|
||||
|
||||
--max-blocks::
|
||||
Set the maximum number of program blocks to print with brstackasm for
|
||||
each sample.
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||
|
|
|
@ -94,8 +94,7 @@ to activate system-wide monitoring. Default is to count on all CPUs.
|
|||
|
||||
-A::
|
||||
--no-aggr::
|
||||
Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
|
||||
This option is only valid in system-wide mode.
|
||||
Do not aggregate counts across all monitored CPUs.
|
||||
|
||||
-n::
|
||||
--null::
|
||||
|
@ -237,6 +236,9 @@ To interpret the results it is usually needed to know on which
|
|||
CPUs the workload runs on. If needed the CPUs can be forced using
|
||||
taskset.
|
||||
|
||||
--no-merge::
|
||||
Do not merge results from same PMUs.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
|
|
|
@ -123,7 +123,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
|
|||
major or all pagefaults. Default value is maj.
|
||||
|
||||
--syscalls::
|
||||
Trace system calls. This options is enabled by default.
|
||||
Trace system calls. This options is enabled by default, disable with
|
||||
--no-syscalls.
|
||||
|
||||
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
|
||||
Setup and enable call-graph (stack chain/backtrace) recording.
|
||||
|
|
|
@ -11,8 +11,8 @@ All fields are in native-endian of the machine that generated the perf.data.
|
|||
|
||||
When perf is writing to a pipe it uses a special version of the file
|
||||
format that does not rely on seeking to adjust data offsets. This
|
||||
format is not described here. The pipe version can be converted to
|
||||
normal perf.data with perf inject.
|
||||
format is described in "Pipe-mode data" section. The pipe data version can be
|
||||
augmented with additional events using perf inject.
|
||||
|
||||
The file starts with a perf_header:
|
||||
|
||||
|
@ -411,6 +411,21 @@ An array bound by the perf_file_section size.
|
|||
|
||||
ids points to a array of uint64_t defining the ids for event attr attr.
|
||||
|
||||
Pipe-mode data
|
||||
|
||||
Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
|
||||
from the struct perf_header. The trimmed header is:
|
||||
|
||||
struct perf_pipe_file_header {
|
||||
u64 magic;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
The information about attrs, data, and event_types is instead in the
|
||||
synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA and
|
||||
PERF_RECORD_HEADER_EVENT_TYPE that are generated by perf record in pipe-mode.
|
||||
|
||||
|
||||
References:
|
||||
|
||||
include/uapi/linux/perf_event.h
|
||||
|
|
|
@ -12,6 +12,7 @@ tools/arch/sparc/include/asm/barrier_32.h
|
|||
tools/arch/sparc/include/asm/barrier_64.h
|
||||
tools/arch/tile/include/asm/barrier.h
|
||||
tools/arch/x86/include/asm/barrier.h
|
||||
tools/arch/x86/include/asm/cmpxchg.h
|
||||
tools/arch/x86/include/asm/cpufeatures.h
|
||||
tools/arch/x86/include/asm/disabled-features.h
|
||||
tools/arch/x86/include/asm/required-features.h
|
||||
|
@ -63,6 +64,7 @@ tools/include/linux/bitops.h
|
|||
tools/include/linux/compiler.h
|
||||
tools/include/linux/compiler-gcc.h
|
||||
tools/include/linux/coresight-pmu.h
|
||||
tools/include/linux/bug.h
|
||||
tools/include/linux/filter.h
|
||||
tools/include/linux/hash.h
|
||||
tools/include/linux/kernel.h
|
||||
|
@ -72,12 +74,15 @@ tools/include/uapi/asm-generic/mman-common.h
|
|||
tools/include/uapi/asm-generic/mman.h
|
||||
tools/include/uapi/linux/bpf.h
|
||||
tools/include/uapi/linux/bpf_common.h
|
||||
tools/include/uapi/linux/fcntl.h
|
||||
tools/include/uapi/linux/hw_breakpoint.h
|
||||
tools/include/uapi/linux/mman.h
|
||||
tools/include/uapi/linux/perf_event.h
|
||||
tools/include/uapi/linux/stat.h
|
||||
tools/include/linux/poison.h
|
||||
tools/include/linux/rbtree.h
|
||||
tools/include/linux/rbtree_augmented.h
|
||||
tools/include/linux/refcount.h
|
||||
tools/include/linux/string.h
|
||||
tools/include/linux/stringify.h
|
||||
tools/include/linux/types.h
|
||||
|
|
|
@ -170,13 +170,20 @@ PYTHON2_CONFIG := \
|
|||
override PYTHON_CONFIG := \
|
||||
$(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
|
||||
|
||||
grep-libs = $(filter -l%,$(1))
|
||||
strip-libs = $(filter-out -l%,$(1))
|
||||
|
||||
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
|
||||
|
||||
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
|
||||
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
|
||||
|
||||
ifeq ($(CC), clang)
|
||||
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
|
||||
ifdef PYTHON_CONFIG
|
||||
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
|
||||
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
|
||||
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
|
||||
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
|
||||
ifeq ($(CC), clang)
|
||||
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
|
||||
endif
|
||||
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
|
||||
endif
|
||||
|
||||
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
|
||||
|
@ -267,6 +274,7 @@ ifdef NO_LIBELF
|
|||
NO_LIBUNWIND := 1
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
NO_LIBBPF := 1
|
||||
NO_JVMTI := 1
|
||||
else
|
||||
ifeq ($(feature-libelf), 0)
|
||||
ifeq ($(feature-glibc), 1)
|
||||
|
@ -276,7 +284,7 @@ else
|
|||
LIBC_SUPPORT := 1
|
||||
endif
|
||||
ifeq ($(LIBC_SUPPORT),1)
|
||||
msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);
|
||||
msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
|
||||
|
||||
NO_LIBELF := 1
|
||||
NO_DWARF := 1
|
||||
|
@ -284,6 +292,7 @@ else
|
|||
NO_LIBUNWIND := 1
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
NO_LIBBPF := 1
|
||||
NO_JVMTI := 1
|
||||
else
|
||||
ifneq ($(filter s% -static%,$(LDFLAGS),),)
|
||||
msg := $(error No static glibc found, please install glibc-static);
|
||||
|
@ -317,6 +326,10 @@ ifdef NO_DWARF
|
|||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
endif
|
||||
|
||||
ifeq ($(feature-sched_getcpu), 1)
|
||||
CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
|
||||
endif
|
||||
|
||||
ifndef NO_LIBELF
|
||||
CFLAGS += -DHAVE_LIBELF_SUPPORT
|
||||
EXTLIBS += -lelf
|
||||
|
@ -550,8 +563,6 @@ ifndef NO_GTK2
|
|||
endif
|
||||
endif
|
||||
|
||||
grep-libs = $(filter -l%,$(1))
|
||||
strip-libs = $(filter-out -l%,$(1))
|
||||
|
||||
ifdef NO_LIBPERL
|
||||
CFLAGS += -DNO_LIBPERL
|
||||
|
@ -599,21 +610,9 @@ else
|
|||
$(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
|
||||
else
|
||||
|
||||
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
|
||||
|
||||
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
|
||||
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
|
||||
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
|
||||
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
|
||||
ifeq ($(CC), clang)
|
||||
PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
|
||||
endif
|
||||
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
|
||||
|
||||
ifneq ($(feature-libpython), 1)
|
||||
$(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
|
||||
else
|
||||
|
||||
ifneq ($(feature-libpython-version), 1)
|
||||
$(warning Python 3 is not yet supported; please set)
|
||||
$(warning PYTHON and/or PYTHON_CONFIG appropriately.)
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "../../util/cs-etm.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define ENABLE_SINK_MAX 128
|
||||
#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <dwarf-regs.h>
|
||||
|
||||
struct pt_regs_dwarfnum {
|
||||
|
@ -16,10 +17,9 @@ struct pt_regs_dwarfnum {
|
|||
unsigned int dwarfnum;
|
||||
};
|
||||
|
||||
#define STR(s) #s
|
||||
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
|
||||
#define GPR_DWARFNUM_NAME(num) \
|
||||
{.name = STR(%r##num), .dwarfnum = num}
|
||||
{.name = __stringify(%r##num), .dwarfnum = num}
|
||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include <elfutils/libdwfl.h>
|
||||
#include "../../util/unwind-libdw.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
#include "../../util/event.h"
|
||||
|
||||
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
|
||||
{
|
||||
|
|
|
@ -8,9 +8,12 @@
|
|||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <dwarf-regs.h>
|
||||
#include <linux/ptrace.h> /* for struct user_pt_regs */
|
||||
#include <linux/stringify.h>
|
||||
#include "util.h"
|
||||
|
||||
struct pt_regs_dwarfnum {
|
||||
|
@ -20,7 +23,7 @@ struct pt_regs_dwarfnum {
|
|||
|
||||
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
|
||||
#define GPR_DWARFNUM_NAME(num) \
|
||||
{.name = STR(%x##num), .dwarfnum = num}
|
||||
{.name = __stringify(%x##num), .dwarfnum = num}
|
||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
|
||||
#define DWARFNUM2OFFSET(index) \
|
||||
(index * sizeof((struct user_pt_regs *)0)->regs[0])
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include <errno.h>
|
||||
|
||||
#ifndef REMOTE_UNWIND_LIBUNWIND
|
||||
#include <errno.h>
|
||||
#include <libunwind.h>
|
||||
#include "perf_regs.h"
|
||||
#include "../../util/unwind.h"
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
#include "../util/util.h"
|
||||
#include "../util/debug.h"
|
||||
|
||||
#include "sane_ctype.h"
|
||||
|
||||
const char *const arm_triplets[] = {
|
||||
"arm-eabi-",
|
||||
"arm-linux-androideabi-",
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <dwarf-regs.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/stringify.h>
|
||||
#include "util.h"
|
||||
|
||||
struct pt_regs_dwarfnum {
|
||||
|
@ -24,10 +25,10 @@ struct pt_regs_dwarfnum {
|
|||
};
|
||||
|
||||
#define REG_DWARFNUM_NAME(r, num) \
|
||||
{.name = STR(%)STR(r), .dwarfnum = num, \
|
||||
{.name = __stringify(%)__stringify(r), .dwarfnum = num, \
|
||||
.ptregs_offset = offsetof(struct pt_regs, r)}
|
||||
#define GPR_DWARFNUM_NAME(num) \
|
||||
{.name = STR(%gpr##num), .dwarfnum = num, \
|
||||
{.name = __stringify(%gpr##num), .dwarfnum = num, \
|
||||
.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
|
||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <errno.h>
|
||||
#include "util/kvm-stat.h"
|
||||
#include "util/parse-events.h"
|
||||
#include "util/debug.h"
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "../../perf.h"
|
||||
#include "../../util/util.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
#include "../../util/debug.h"
|
||||
|
||||
const struct sample_reg sample_reg_masks[] = {
|
||||
SMPL_REG(r0, PERF_REG_POWERPC_R0),
|
||||
|
@ -47,3 +53,109 @@ const struct sample_reg sample_reg_masks[] = {
|
|||
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
|
||||
SMPL_REG_END
|
||||
};
|
||||
|
||||
/* REG or %rREG */
|
||||
#define SDT_OP_REGEX1 "^(%r)?([1-2]?[0-9]|3[0-1])$"
|
||||
|
||||
/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */
|
||||
#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$"
|
||||
|
||||
static regex_t sdt_op_regex1, sdt_op_regex2;
|
||||
|
||||
static int sdt_init_op_regex(void)
|
||||
{
|
||||
static int initialized;
|
||||
int ret = 0;
|
||||
|
||||
if (initialized)
|
||||
return 0;
|
||||
|
||||
ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
|
||||
if (ret)
|
||||
goto free_regex1;
|
||||
|
||||
initialized = 1;
|
||||
return 0;
|
||||
|
||||
free_regex1:
|
||||
regfree(&sdt_op_regex1);
|
||||
error:
|
||||
pr_debug4("Regex compilation error.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG).
|
||||
* Possible variants of OP are:
|
||||
* Format Example
|
||||
* -------------------------
|
||||
* NUM(REG) 48(18)
|
||||
* -NUM(REG) -48(18)
|
||||
* NUM(%rREG) 48(%r18)
|
||||
* -NUM(%rREG) -48(%r18)
|
||||
* REG 18
|
||||
* %rREG %r18
|
||||
* iNUM i0
|
||||
* i-NUM i-1
|
||||
*
|
||||
* SDT marker arguments on Powerpc uses %rREG form with -mregnames flag
|
||||
* and REG form with -mno-regnames. Here REG is general purpose register,
|
||||
* which is in 0 to 31 range.
|
||||
*/
|
||||
int arch_sdt_arg_parse_op(char *old_op, char **new_op)
|
||||
{
|
||||
int ret, new_len;
|
||||
regmatch_t rm[5];
|
||||
char prefix;
|
||||
|
||||
/* Constant argument. Uprobe does not support it */
|
||||
if (old_op[0] == 'i') {
|
||||
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
|
||||
return SDT_ARG_SKIP;
|
||||
}
|
||||
|
||||
ret = sdt_init_op_regex();
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
|
||||
/* REG or %rREG --> %gprREG */
|
||||
|
||||
new_len = 5; /* % g p r NULL */
|
||||
new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
|
||||
|
||||
*new_op = zalloc(new_len);
|
||||
if (!*new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
scnprintf(*new_op, new_len, "%%gpr%.*s",
|
||||
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so);
|
||||
} else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
|
||||
/*
|
||||
* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) -->
|
||||
* +/-NUM(%gprREG)
|
||||
*/
|
||||
prefix = (rm[1].rm_so == -1) ? '+' : '-';
|
||||
|
||||
new_len = 8; /* +/- ( % g p r ) NULL */
|
||||
new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
|
||||
new_len += (int)(rm[4].rm_eo - rm[4].rm_so);
|
||||
|
||||
*new_op = zalloc(new_len);
|
||||
if (!*new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix,
|
||||
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
|
||||
(int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so);
|
||||
} else {
|
||||
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
|
||||
return SDT_ARG_SKIP;
|
||||
}
|
||||
|
||||
return SDT_ARG_VALID;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "symbol.h"
|
||||
#include "map.h"
|
||||
#include "probe-event.h"
|
||||
#include "probe-file.h"
|
||||
|
||||
#ifdef HAVE_LIBELF_SUPPORT
|
||||
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
|
||||
|
@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
|||
* However, if the user specifies an offset, we fall back to using the
|
||||
* GEP since all userspace applications (objdump/readelf) show function
|
||||
* disassembly with offsets from the GEP.
|
||||
*
|
||||
* In addition, we shouldn't specify an offset for kretprobes.
|
||||
*/
|
||||
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
|
||||
!map || !sym)
|
||||
if (pev->point.offset || !map || !sym)
|
||||
return;
|
||||
|
||||
/* For kretprobes, add an offset only if the kernel supports it */
|
||||
if (!pev->uprobes && pev->point.retprobe) {
|
||||
#ifdef HAVE_LIBELF_SUPPORT
|
||||
if (!kretprobe_offset_is_supported())
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
|
||||
|
||||
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
|
||||
{
|
||||
struct ins_ops *ops = NULL;
|
||||
|
||||
/* catch all kind of jumps */
|
||||
if (strchr(name, 'j') ||
|
||||
!strncmp(name, "bct", 3) ||
|
||||
!strncmp(name, "br", 2))
|
||||
ops = &jump_ops;
|
||||
/* override call/returns */
|
||||
if (!strcmp(name, "bras") ||
|
||||
!strcmp(name, "brasl") ||
|
||||
!strcmp(name, "basr"))
|
||||
ops = &call_ops;
|
||||
if (!strcmp(name, "br"))
|
||||
ops = &ret_ops;
|
||||
|
||||
arch__associate_ins_ops(arch, name, ops);
|
||||
return ops;
|
||||
}
|
||||
|
||||
static int s390__annotate_init(struct arch *arch)
|
||||
{
|
||||
if (!arch->initialized) {
|
||||
arch->initialized = true;
|
||||
arch->associate_instruction_ops = s390__associate_ins_ops;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -9,6 +9,7 @@
|
|||
* as published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include "../../util/kvm-stat.h"
|
||||
#include <asm/sie.h>
|
||||
|
||||
|
|
|
@ -338,6 +338,7 @@
|
|||
329 common pkey_mprotect sys_pkey_mprotect
|
||||
330 common pkey_alloc sys_pkey_alloc
|
||||
331 common pkey_free sys_pkey_free
|
||||
332 common statx sys_statx
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
|
|
|
@ -6,7 +6,10 @@
|
|||
#include "evsel.h"
|
||||
#include "arch-tests.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
static pid_t spawn(void)
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <linux/types.h>
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "../../util/header.h"
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitops.h>
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <errno.h>
|
||||
#include "../../util/kvm-stat.h"
|
||||
#include <asm/svm.h>
|
||||
#include <asm/vmx.h>
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "../../perf.h"
|
||||
#include "../../util/util.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
#include "../../util/debug.h"
|
||||
|
||||
const struct sample_reg sample_reg_masks[] = {
|
||||
SMPL_REG(AX, PERF_REG_X86_AX),
|
||||
|
@ -26,3 +32,224 @@ const struct sample_reg sample_reg_masks[] = {
|
|||
#endif
|
||||
SMPL_REG_END
|
||||
};
|
||||
|
||||
struct sdt_name_reg {
|
||||
const char *sdt_name;
|
||||
const char *uprobe_name;
|
||||
};
|
||||
#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
|
||||
#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
|
||||
|
||||
static const struct sdt_name_reg sdt_reg_tbl[] = {
|
||||
SDT_NAME_REG(eax, ax),
|
||||
SDT_NAME_REG(rax, ax),
|
||||
SDT_NAME_REG(al, ax),
|
||||
SDT_NAME_REG(ah, ax),
|
||||
SDT_NAME_REG(ebx, bx),
|
||||
SDT_NAME_REG(rbx, bx),
|
||||
SDT_NAME_REG(bl, bx),
|
||||
SDT_NAME_REG(bh, bx),
|
||||
SDT_NAME_REG(ecx, cx),
|
||||
SDT_NAME_REG(rcx, cx),
|
||||
SDT_NAME_REG(cl, cx),
|
||||
SDT_NAME_REG(ch, cx),
|
||||
SDT_NAME_REG(edx, dx),
|
||||
SDT_NAME_REG(rdx, dx),
|
||||
SDT_NAME_REG(dl, dx),
|
||||
SDT_NAME_REG(dh, dx),
|
||||
SDT_NAME_REG(esi, si),
|
||||
SDT_NAME_REG(rsi, si),
|
||||
SDT_NAME_REG(sil, si),
|
||||
SDT_NAME_REG(edi, di),
|
||||
SDT_NAME_REG(rdi, di),
|
||||
SDT_NAME_REG(dil, di),
|
||||
SDT_NAME_REG(ebp, bp),
|
||||
SDT_NAME_REG(rbp, bp),
|
||||
SDT_NAME_REG(bpl, bp),
|
||||
SDT_NAME_REG(rsp, sp),
|
||||
SDT_NAME_REG(esp, sp),
|
||||
SDT_NAME_REG(spl, sp),
|
||||
|
||||
/* rNN registers */
|
||||
SDT_NAME_REG(r8b, r8),
|
||||
SDT_NAME_REG(r8w, r8),
|
||||
SDT_NAME_REG(r8d, r8),
|
||||
SDT_NAME_REG(r9b, r9),
|
||||
SDT_NAME_REG(r9w, r9),
|
||||
SDT_NAME_REG(r9d, r9),
|
||||
SDT_NAME_REG(r10b, r10),
|
||||
SDT_NAME_REG(r10w, r10),
|
||||
SDT_NAME_REG(r10d, r10),
|
||||
SDT_NAME_REG(r11b, r11),
|
||||
SDT_NAME_REG(r11w, r11),
|
||||
SDT_NAME_REG(r11d, r11),
|
||||
SDT_NAME_REG(r12b, r12),
|
||||
SDT_NAME_REG(r12w, r12),
|
||||
SDT_NAME_REG(r12d, r12),
|
||||
SDT_NAME_REG(r13b, r13),
|
||||
SDT_NAME_REG(r13w, r13),
|
||||
SDT_NAME_REG(r13d, r13),
|
||||
SDT_NAME_REG(r14b, r14),
|
||||
SDT_NAME_REG(r14w, r14),
|
||||
SDT_NAME_REG(r14d, r14),
|
||||
SDT_NAME_REG(r15b, r15),
|
||||
SDT_NAME_REG(r15w, r15),
|
||||
SDT_NAME_REG(r15d, r15),
|
||||
SDT_NAME_REG_END,
|
||||
};
|
||||
|
||||
/*
|
||||
* Perf only supports OP which is in +/-NUM(REG) form.
|
||||
* Here plus-minus sign, NUM and parenthesis are optional,
|
||||
* only REG is mandatory.
|
||||
*
|
||||
* SDT events also supports indirect addressing mode with a
|
||||
* symbol as offset, scaled mode and constants in OP. But
|
||||
* perf does not support them yet. Below are few examples.
|
||||
*
|
||||
* OP with scaled mode:
|
||||
* (%rax,%rsi,8)
|
||||
* 10(%ras,%rsi,8)
|
||||
*
|
||||
* OP with indirect addressing mode:
|
||||
* check_action(%rip)
|
||||
* mp_+52(%rip)
|
||||
* 44+mp_(%rip)
|
||||
*
|
||||
* OP with constant values:
|
||||
* $0
|
||||
* $123
|
||||
* $-1
|
||||
*/
|
||||
#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
|
||||
|
||||
static regex_t sdt_op_regex;
|
||||
|
||||
static int sdt_init_op_regex(void)
|
||||
{
|
||||
static int initialized;
|
||||
int ret = 0;
|
||||
|
||||
if (initialized)
|
||||
return 0;
|
||||
|
||||
ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
|
||||
if (ret < 0) {
|
||||
pr_debug4("Regex compilation error.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
initialized = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Max x86 register name length is 5(ex: %r15d). So, 6th char
|
||||
* should always contain NULL. This helps to find register name
|
||||
* length using strlen, insted of maintaing one more variable.
|
||||
*/
|
||||
#define SDT_REG_NAME_SIZE 6
|
||||
|
||||
/*
|
||||
* The uprobe parser does not support all gas register names;
|
||||
* so, we have to replace them (ex. for x86_64: %rax -> %ax).
|
||||
* Note: If register does not require renaming, just copy
|
||||
* paste as it is, but don't leave it empty.
|
||||
*/
|
||||
static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
|
||||
if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
|
||||
strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
strncpy(uprobe_reg, sdt_reg, sdt_len);
|
||||
}
|
||||
|
||||
int arch_sdt_arg_parse_op(char *old_op, char **new_op)
|
||||
{
|
||||
char new_reg[SDT_REG_NAME_SIZE] = {0};
|
||||
int new_len = 0, ret;
|
||||
/*
|
||||
* rm[0]: +/-NUM(REG)
|
||||
* rm[1]: +/-
|
||||
* rm[2]: NUM
|
||||
* rm[3]: (
|
||||
* rm[4]: REG
|
||||
* rm[5]: )
|
||||
*/
|
||||
regmatch_t rm[6];
|
||||
/*
|
||||
* Max prefix length is 2 as it may contains sign(+/-)
|
||||
* and displacement 0 (Both sign and displacement 0 are
|
||||
* optional so it may be empty). Use one more character
|
||||
* to hold last NULL so that strlen can be used to find
|
||||
* prefix length, instead of maintaing one more variable.
|
||||
*/
|
||||
char prefix[3] = {0};
|
||||
|
||||
ret = sdt_init_op_regex();
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If unsupported OR does not match with regex OR
|
||||
* register name too long, skip it.
|
||||
*/
|
||||
if (strchr(old_op, ',') || strchr(old_op, '$') ||
|
||||
regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
|
||||
rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
|
||||
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
|
||||
return SDT_ARG_SKIP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare prefix.
|
||||
* If SDT OP has parenthesis but does not provide
|
||||
* displacement, add 0 for displacement.
|
||||
* SDT Uprobe Prefix
|
||||
* -----------------------------
|
||||
* +24(%rdi) +24(%di) +
|
||||
* 24(%rdi) +24(%di) +
|
||||
* %rdi %di
|
||||
* (%rdi) +0(%di) +0
|
||||
* -80(%rbx) -80(%bx) -
|
||||
*/
|
||||
if (rm[3].rm_so != rm[3].rm_eo) {
|
||||
if (rm[1].rm_so != rm[1].rm_eo)
|
||||
prefix[0] = *(old_op + rm[1].rm_so);
|
||||
else if (rm[2].rm_so != rm[2].rm_eo)
|
||||
prefix[0] = '+';
|
||||
else
|
||||
strncpy(prefix, "+0", 2);
|
||||
}
|
||||
|
||||
/* Rename register */
|
||||
sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
|
||||
new_reg);
|
||||
|
||||
/* Prepare final OP which should be valid for uprobe_events */
|
||||
new_len = strlen(prefix) +
|
||||
(rm[2].rm_eo - rm[2].rm_so) +
|
||||
(rm[3].rm_eo - rm[3].rm_so) +
|
||||
strlen(new_reg) +
|
||||
(rm[5].rm_eo - rm[5].rm_so) +
|
||||
1; /* NULL */
|
||||
|
||||
*new_op = zalloc(new_len);
|
||||
if (!*new_op)
|
||||
return -ENOMEM;
|
||||
|
||||
scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
|
||||
strlen(prefix), prefix,
|
||||
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
|
||||
(int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
|
||||
strlen(new_reg), new_reg,
|
||||
(int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
|
||||
|
||||
return SDT_ARG_VALID;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include <elfutils/libdwfl.h>
|
||||
#include "../../util/unwind-libdw.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
#include "../../util/event.h"
|
||||
|
||||
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
|
||||
{
|
||||
|
|
|
@ -25,17 +25,17 @@
|
|||
# endif
|
||||
#endif
|
||||
|
||||
int bench_numa(int argc, const char **argv, const char *prefix);
|
||||
int bench_sched_messaging(int argc, const char **argv, const char *prefix);
|
||||
int bench_sched_pipe(int argc, const char **argv, const char *prefix);
|
||||
int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
|
||||
int bench_mem_memset(int argc, const char **argv, const char *prefix);
|
||||
int bench_futex_hash(int argc, const char **argv, const char *prefix);
|
||||
int bench_futex_wake(int argc, const char **argv, const char *prefix);
|
||||
int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
|
||||
int bench_futex_requeue(int argc, const char **argv, const char *prefix);
|
||||
int bench_numa(int argc, const char **argv);
|
||||
int bench_sched_messaging(int argc, const char **argv);
|
||||
int bench_sched_pipe(int argc, const char **argv);
|
||||
int bench_mem_memcpy(int argc, const char **argv);
|
||||
int bench_mem_memset(int argc, const char **argv);
|
||||
int bench_futex_hash(int argc, const char **argv);
|
||||
int bench_futex_wake(int argc, const char **argv);
|
||||
int bench_futex_wake_parallel(int argc, const char **argv);
|
||||
int bench_futex_requeue(int argc, const char **argv);
|
||||
/* pi futexes */
|
||||
int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
|
||||
int bench_futex_lock_pi(int argc, const char **argv);
|
||||
|
||||
#define BENCH_FORMAT_DEFAULT_STR "default"
|
||||
#define BENCH_FORMAT_DEFAULT 0
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue