Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main kernel side changes in this cycle were:

   - Various Intel-PT updates and optimizations (Alexander Shishkin)

   - Prohibit kprobes on Xen/KVM emulate prefixes (Masami Hiramatsu)

   - Add support for LSM and SELinux checks to control access to the
     perf syscall (Joel Fernandes)

   - Misc other changes, optimizations, fixes and cleanups - see the
     shortlog for details.

  There were numerous tooling changes as well - 254 non-merge commits.
  Here are the main changes - too many to list in detail:

   - Enhancements to core tooling infrastructure, perf.data, libperf,
     libtraceevent, event parsing, vendor events, Intel PT, callchains,
     BPF support and instruction decoding.

   - There were updates to the following tools:

        perf annotate
        perf diff
        perf inject
        perf kvm
        perf list
        perf maps
        perf parse
        perf probe
        perf record
        perf report
        perf script
        perf stat
        perf test
        perf trace

   - And a lot of other changes: please see the shortlog and Git log for
     more details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (279 commits)
  perf parse: Fix potential memory leak when handling tracepoint errors
  perf probe: Fix spelling mistake "addrees" -> "address"
  libtraceevent: Fix memory leakage in copy_filter_type
  libtraceevent: Fix header installation
  perf intel-bts: Does not support AUX area sampling
  perf intel-pt: Add support for decoding AUX area samples
  perf intel-pt: Add support for recording AUX area samples
  perf pmu: When using default config, record which bits of config were changed by the user
  perf auxtrace: Add support for queuing AUX area samples
  perf session: Add facility to peek at all events
  perf auxtrace: Add support for dumping AUX area samples
  perf inject: Cut AUX area samples
  perf record: Add aux-sample-size config term
  perf record: Add support for AUX area sampling
  perf auxtrace: Add support for AUX area sample recording
  perf auxtrace: Move perf_evsel__find_pmu()
  perf record: Add a function to test for kernel support for AUX area sampling
  perf tools: Add kernel AUX area sampling definitions
  perf/core: Make the mlock accounting simple again
  perf report: Jump to symbol source view from total cycles view
  ...
This commit is contained in:
Linus Torvalds 2019-11-26 15:04:47 -08:00
commit 3f59dbcace
297 changed files with 44924 additions and 35331 deletions

View File

@ -12846,6 +12846,13 @@ F: arch/*/events/*
F: arch/*/events/*/*
F: tools/perf/
PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
R: John Garry <john.garry@huawei.com>
R: Will Deacon <will@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: tools/perf/pmu-events/arch/arm64/
PERSONALITY HANDLING
M: Christoph Hellwig <hch@infradead.org>
L: linux-abi-devel@lists.sourceforge.net

View File

@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(mfspr(SPRN_SDAR)))
if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
*addrp = 0;
}
@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
*/
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
is_kernel_addr(addr))
if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type &
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
perf_get_data_addr(regs, &data.addr);
perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(cpuhw);
power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
}

View File

@ -652,15 +652,7 @@ static void amd_pmu_disable_event(struct perf_event *event)
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int active, handled;
/*
* Obtain the active count before calling x86_pmu_handle_irq() since
* it is possible that x86_pmu_handle_irq() may make a counter
* inactive (through x86_pmu_stop).
*/
active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
int handled;
/* Process any counter overflows */
handled = x86_pmu_handle_irq(regs);
@ -670,8 +662,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
* NMIs will be claimed if arriving within that window.
*/
if (handled) {
this_cpu_write(perf_nmi_tstamp,
jiffies + perf_nmi_window);
this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
return handled;
}

View File

@ -2243,6 +2243,13 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
x86_pmu.sched_task(ctx, sched_in);
}
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
if (x86_pmu.swap_task_ctx)
x86_pmu.swap_task_ctx(prev, next);
}
void perf_check_microcode(void)
{
if (x86_pmu.check_microcode)
@ -2297,6 +2304,7 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,

View File

@ -549,9 +549,11 @@ static int bts_event_init(struct perf_event *event)
* Note that the default paranoia setting permits unprivileged
* users to profile the kernel.
*/
if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
!capable(CAP_SYS_ADMIN))
return -EACCES;
if (event->attr.exclude_kernel) {
ret = perf_allow_kernel(&event->attr);
if (ret)
return ret;
}
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;

View File

@ -3315,8 +3315,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
ret = perf_allow_cpu(&event->attr);
if (ret)
return ret;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
@ -3830,6 +3831,12 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
intel_pmu_lbr_sched_task(ctx, sched_in);
}
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
intel_pmu_lbr_swap_task_ctx(prev, next);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
{
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
@ -3965,6 +3972,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
.swap_task_ctx = intel_pmu_swap_task_ctx,
.check_period = intel_pmu_check_period,

View File

@ -417,6 +417,29 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
cpuc->last_log_id = ++task_ctx->log_id;
}
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
swap(prev->task_ctx_data, next->task_ctx_data);
/*
* Architecture specific synchronization makes sense in
* case both prev->task_ctx_data and next->task_ctx_data
* pointers are allocated.
*/
prev_ctx_data = next->task_ctx_data;
next_ctx_data = prev->task_ctx_data;
if (!prev_ctx_data || !next_ctx_data)
return;
swap(prev_ctx_data->lbr_callstack_users,
next_ctx_data->lbr_callstack_users);
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

View File

@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
* the user needs special permissions to be able to use it
*/
if (p4_ht_active() && p4_event_bind_map[v].shared) {
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
v = perf_allow_cpu(&event->attr);
if (v)
return v;
}
/* ESCR EventMask bits may be invalid */

View File

@ -397,6 +397,20 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
static void pt_config_start(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 ctl = event->hw.config;
ctl |= RTIT_CTL_TRACEEN;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, ctl);
WRITE_ONCE(event->hw.config, ctl);
}
/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
unsigned long msr_a;
@ -469,6 +483,7 @@ static u64 pt_config_filters(struct perf_event *event)
static void pt_config(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
@ -478,7 +493,9 @@ static void pt_config(struct perf_event *event)
}
reg = pt_config_filters(event);
reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
reg |= RTIT_CTL_TRACEEN;
if (!buf->single)
reg |= RTIT_CTL_TOPA;
/*
* Previously, we had BRANCH_EN on by default, but now that PT has
@ -501,10 +518,7 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.config = reg;
if (READ_ONCE(pt->vmx_on))
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
else
wrmsrl(MSR_IA32_RTIT_CTL, reg);
pt_config_start(event);
}
static void pt_config_stop(struct perf_event *event)
@ -533,18 +547,6 @@ static void pt_config_stop(struct perf_event *event)
wmb();
}
static void pt_config_buffer(void *buf, unsigned int topa_idx,
unsigned int output_off)
{
u64 reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
/**
* struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
@ -602,6 +604,33 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
static void pt_config_buffer(struct pt_buffer *buf)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg, mask;
void *base;
if (buf->single) {
base = buf->data_pages[0];
mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
} else {
base = topa_to_page(buf->cur)->table;
mask = (u64)buf->cur_idx;
}
reg = virt_to_phys(base);
if (pt->output_base != reg) {
pt->output_base = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
}
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
if (pt->output_mask != reg) {
pt->output_mask = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
}
/**
* topa_alloc() - allocate page-sized ToPA table
* @cpu: CPU on which to allocate.
@ -802,6 +831,11 @@ static void pt_update_head(struct pt *pt)
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 topa_idx, base, old;
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
}
/* offset of the first region in this table from the beginning of buf */
base = buf->cur->offset + buf->output_off;
@ -903,18 +937,21 @@ static void pt_handle_status(struct pt *pt)
*/
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct topa_page *tp;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
tp = phys_to_virt(base_topa);
buf->cur = &tp->topa;
if (!buf->single) {
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
buf->output_off = offset >> 32;
buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
buf->cur_idx = (offset & 0xffffff80) >> 7;
if (!buf->single)
buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
}
static struct topa_entry *
@ -1030,6 +1067,9 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long head = local64_read(&buf->head);
unsigned long idx, npages, wakeup;
if (buf->single)
return 0;
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
@ -1111,13 +1151,17 @@ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
te = pt_topa_entry_for_page(buf, pg);
if (!buf->single) {
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
te = pt_topa_entry_for_page(buf, pg);
cur_tp = topa_entry_to_page(te);
buf->cur = &cur_tp->topa;
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
cur_tp = topa_entry_to_page(te);
buf->cur = &cur_tp->topa;
buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
buf->output_off = head & (pt_buffer_region_size(buf) - 1);
} else {
buf->output_off = head;
}
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
@ -1131,6 +1175,9 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
{
struct topa *topa, *iter;
if (buf->single)
return;
list_for_each_entry_safe(topa, iter, &buf->tables, list) {
/*
* right now, this is in free_aux() path only, so
@ -1176,6 +1223,36 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
return 0;
}
static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
{
struct page *p = virt_to_page(buf->data_pages[0]);
int ret = -ENOTSUPP, order = 0;
/*
* We can use single range output mode
* + in snapshot mode, where we don't need interrupts;
* + if the hardware supports it;
* + if the entire buffer is one contiguous allocation.
*/
if (!buf->snapshot)
goto out;
if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
goto out;
if (PagePrivate(p))
order = page_private(p);
if (1 << order != nr_pages)
goto out;
buf->single = true;
buf->nr_pages = nr_pages;
ret = 0;
out:
return ret;
}
/**
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
* @cpu: Cpu on which to allocate, -1 means current.
@ -1198,6 +1275,13 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
if (!nr_pages)
return NULL;
/*
* Only support AUX sampling in snapshot mode, where we don't
* generate NMIs.
*/
if (event->attr.aux_sample_size && !snapshot)
return NULL;
if (cpu == -1)
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
@ -1213,6 +1297,10 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
INIT_LIST_HEAD(&buf->tables);
ret = pt_buffer_try_single(buf, nr_pages);
if (!ret)
return buf;
ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
@ -1379,9 +1467,8 @@ void intel_pt_interrupt(void)
return;
}
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
pt_config_buffer(buf);
pt_config_start(event);
}
}
@ -1444,8 +1531,7 @@ static void pt_event_start(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config_buffer(buf);
pt_config(event);
return;
@ -1496,6 +1582,52 @@ static void pt_event_stop(struct perf_event *event, int mode)
}
}
static long pt_event_snapshot_aux(struct perf_event *event,
struct perf_output_handle *handle,
unsigned long size)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct pt_buffer *buf = perf_get_aux(&pt->handle);
unsigned long from = 0, to;
long ret;
if (WARN_ON_ONCE(!buf))
return 0;
/*
* Sampling is only allowed on snapshot events;
* see pt_buffer_setup_aux().
*/
if (WARN_ON_ONCE(!buf->snapshot))
return 0;
/*
* Here, handle_nmi tells us if the tracing is on
*/
if (READ_ONCE(pt->handle_nmi))
pt_config_stop(event);
pt_read_offset(buf);
pt_update_head(pt);
to = local_read(&buf->data_size);
if (to < size)
from = buf->nr_pages << PAGE_SHIFT;
from += to - size;
ret = perf_output_copy_aux(&pt->handle, handle, from, to);
/*
* If the tracing was on when we turned up, restart it.
* Compiler barrier not needed as we couldn't have been
* preempted by anything that touches pt->handle_nmi.
*/
if (pt->handle_nmi)
pt_config_start(event);
return ret;
}
static void pt_event_del(struct perf_event *event, int mode)
{
pt_event_stop(event, PERF_EF_UPDATE);
@ -1615,6 +1747,7 @@ static __init int pt_init(void)
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;

View File

@ -64,6 +64,7 @@ struct pt_pmu {
* @lost: if data was lost/truncated
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@ -80,6 +81,7 @@ struct pt_buffer {
local_t data_size;
local64_t head;
bool snapshot;
bool single;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
@ -111,16 +113,20 @@ struct pt_filters {
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle: perf output handle
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
struct pt {
struct perf_output_handle handle;
struct pt_filters filters;
int handle_nmi;
int vmx_on;
u64 output_base;
u64 output_mask;
};
#endif /* __INTEL_PT_H__ */

View File

@ -682,6 +682,14 @@ struct x86_pmu {
*/
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
* See struct pmu::swap_task_ctx() usage for examples;
*/
void (*swap_task_ctx)(struct perf_event_context *prev,
struct perf_event_context *next);
/*
* AMD bits
*/
@ -1016,6 +1024,9 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
void intel_ds_init(void);
void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);

View File

@ -7,9 +7,11 @@
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
#else
# define __ASM_FORM(x) " " #x " "
# define __ASM_FORM_RAW(x) #x
# define __ASM_FORM_COMMA(x) " " #x ","
#include <linux/stringify.h>
# define __ASM_FORM(x) " " __stringify(x) " "
# define __ASM_FORM_RAW(x) __stringify(x)
# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
#endif
#ifndef __x86_64__

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EMULATE_PREFIX_H
#define _ASM_X86_EMULATE_PREFIX_H
/*
* Virt escape sequences to trigger instruction emulation;
* ideally these would decode to 'whole' instruction and not destroy
* the instruction stream; sadly this is not true for the 'kvm' one :/
*/
#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
#endif

View File

@ -45,6 +45,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{

View File

@ -379,12 +379,9 @@ struct xen_pmu_arch {
* Prefix forces emulation of some non-trapping instructions.
* Currently only CPUID.
*/
#ifdef __ASSEMBLY__
#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
#else
#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
#endif
#include <asm/emulate_prefix.h>
#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
#endif /* _ASM_X86_XEN_INTERFACE_H */

View File

@ -351,6 +351,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
insn_get_length(insn);
/* We can not probe force emulate prefixed instruction */
if (insn_has_emulate_prefix(insn))
return 0;
/* Another subsystem puts a breakpoint, failed to recover */
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;

View File

@ -68,6 +68,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
#include <asm/emulate_prefix.h>
#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
@ -5492,6 +5493,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
int emul_type = EMULTYPE_TRAP_UD;
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
@ -5499,7 +5501,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
if (force_emulation_prefix &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 &&
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
emul_type = EMULTYPE_TRAP_UD_FORCED;
}

View File

@ -13,6 +13,8 @@
#include <asm/inat.h>
#include <asm/insn.h>
#include <asm/emulate_prefix.h>
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->addr_bytes = 4;
}
static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
static int __insn_get_emulate_prefix(struct insn *insn,
const insn_byte_t *prefix, size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
goto err_out;
}
insn->emulate_prefix_size = len;
insn->next_byte += len;
return 1;
err_out:
return 0;
}
static void insn_get_emulate_prefix(struct insn *insn)
{
if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
return;
__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
}
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
if (prefixes->got)
return;
insn_get_emulate_prefix(insn);
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);

View File

@ -333,7 +333,7 @@ AVXcode: 1
06: CLTS
07: SYSRET (o64)
08: INVD
09: WBINVD
09: WBINVD | WBNOINVD (F3)
0a:
0b: UD2 (1B)
0c:
@ -364,7 +364,7 @@ AVXcode: 1
# a ModR/M byte.
1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c:
1c: Grp20 (1A),(1C)
1d:
1e:
1f: NOP Ev
@ -792,6 +792,8 @@ f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
@ -943,9 +945,9 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5: rdpkru (110),(11B) | wrpkru (111),(11B)
@ -1020,7 +1022,7 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@ -1051,6 +1053,10 @@ GrpTable: Grp19
6: vscatterpf1qps/d Wx (66),(ev)
EndTable
GrpTable: Grp20
0: cldemote Mb
EndTable
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH

View File

@ -1818,6 +1818,14 @@ union security_list_options {
void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
#endif /* CONFIG_BPF_SYSCALL */
int (*locked_down)(enum lockdown_reason what);
#ifdef CONFIG_PERF_EVENTS
int (*perf_event_open)(struct perf_event_attr *attr, int type);
int (*perf_event_alloc)(struct perf_event *event);
void (*perf_event_free)(struct perf_event *event);
int (*perf_event_read)(struct perf_event *event);
int (*perf_event_write)(struct perf_event *event);
#endif
};
struct security_hook_heads {
@ -2060,6 +2068,13 @@ struct security_hook_heads {
struct hlist_head bpf_prog_free_security;
#endif /* CONFIG_BPF_SYSCALL */
struct hlist_head locked_down;
#ifdef CONFIG_PERF_EVENTS
struct hlist_head perf_event_open;
struct hlist_head perf_event_alloc;
struct hlist_head perf_event_free;
struct hlist_head perf_event_read;
struct hlist_head perf_event_write;
#endif
} __randomize_layout;
/*

View File

@ -56,6 +56,7 @@ struct perf_guest_info_callbacks {
#include <linux/perf_regs.h>
#include <linux/cgroup.h>
#include <linux/refcount.h>
#include <linux/security.h>
#include <asm/local.h>
struct perf_callchain_entry {
@ -248,6 +249,8 @@ struct perf_event;
#define PERF_PMU_CAP_NO_EXCLUDE 0x80
#define PERF_PMU_CAP_AUX_OUTPUT 0x100
struct perf_output_handle;
/**
* struct pmu - generic performance monitoring unit
*/
@ -409,6 +412,15 @@ struct pmu {
*/
size_t task_ctx_size;
/*
* PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
* can be synchronized using this function. See Intel LBR callstack support
* implementation and Perf core context switch handling callbacks for usage
* examples.
*/
void (*swap_task_ctx) (struct perf_event_context *prev,
struct perf_event_context *next);
/* optional */
/*
* Set up pmu-private data structures for an AUX area
@ -422,6 +434,19 @@ struct pmu {
*/
void (*free_aux) (void *aux); /* optional */
/*
* Take a snapshot of the AUX buffer without touching the event
* state, so that preempting ->start()/->stop() callbacks does
* not interfere with their logic. Called in PMI context.
*
* Returns the size of AUX data copied to the output handle.
*
* Optional.
*/
long (*snapshot_aux) (struct perf_event *event,
struct perf_output_handle *handle,
unsigned long size);
/*
* Validate address range filters: make sure the HW supports the
* requested configuration and number of filters; return 0 if the
@ -721,6 +746,9 @@ struct perf_event {
struct perf_cgroup *cgrp; /* cgroup event is attach to */
#endif
#ifdef CONFIG_SECURITY
void *security;
#endif
struct list_head sb_list;
#endif /* CONFIG_PERF_EVENTS */
};
@ -960,6 +988,7 @@ struct perf_sample_data {
u32 reserved;
} cpu_entry;
struct perf_callchain_entry *callchain;
u64 aux_size;
/*
* regs_user may point to task_pt_regs or to regs_user_copy, depending
@ -1241,19 +1270,41 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
int perf_event_max_stack_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
static inline bool perf_paranoid_tracepoint_raw(void)
/* Access to perf_event_open(2) syscall. */
#define PERF_SECURITY_OPEN 0
/* Finer grained perf_event_open(2) access control. */
#define PERF_SECURITY_CPU 1
#define PERF_SECURITY_KERNEL 2
#define PERF_SECURITY_TRACEPOINT 3
static inline int perf_is_paranoid(void)
{
return sysctl_perf_event_paranoid > -1;
}
static inline bool perf_paranoid_cpu(void)
static inline int perf_allow_kernel(struct perf_event_attr *attr)
{
return sysctl_perf_event_paranoid > 0;
if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN))
return -EACCES;
return security_perf_event_open(attr, PERF_SECURITY_KERNEL);
}
static inline bool perf_paranoid_kernel(void)
static inline int perf_allow_cpu(struct perf_event_attr *attr)
{
return sysctl_perf_event_paranoid > 1;
if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN))
return -EACCES;
return security_perf_event_open(attr, PERF_SECURITY_CPU);
}
static inline int perf_allow_tracepoint(struct perf_event_attr *attr)
{
if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN))
return -EPERM;
return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT);
}
extern void perf_event_init(void);
@ -1327,6 +1378,9 @@ extern unsigned int perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
unsigned int len);
extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
struct perf_output_handle *handle,
unsigned long from, unsigned long to);
extern int perf_swevent_get_recursion_context(void);
extern void perf_swevent_put_recursion_context(int rctx);
extern u64 perf_swevent_set_period(struct perf_event *event);

View File

@ -1895,5 +1895,42 @@ static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_BPF_SYSCALL */
#endif /* ! __LINUX_SECURITY_H */
#ifdef CONFIG_PERF_EVENTS
struct perf_event_attr;
struct perf_event;
#ifdef CONFIG_SECURITY
extern int security_perf_event_open(struct perf_event_attr *attr, int type);
extern int security_perf_event_alloc(struct perf_event *event);
extern void security_perf_event_free(struct perf_event *event);
extern int security_perf_event_read(struct perf_event *event);
extern int security_perf_event_write(struct perf_event *event);
#else
static inline int security_perf_event_open(struct perf_event_attr *attr,
int type)
{
return 0;
}
static inline int security_perf_event_alloc(struct perf_event *event)
{
return 0;
}
static inline void security_perf_event_free(struct perf_event *event)
{
}
static inline int security_perf_event_read(struct perf_event *event)
{
return 0;
}
static inline int security_perf_event_write(struct perf_event *event)
{
return 0;
}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_PERF_EVENTS */
#endif /* ! __LINUX_SECURITY_H */

View File

@ -141,8 +141,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
@ -300,6 +301,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
/*
* Hardware event_id to monitor via a performance monitoring event:
@ -424,7 +426,9 @@ struct perf_event_attr {
*/
__u32 aux_watermark;
__u16 sample_max_stack;
__u16 __reserved_2; /* align to __u64 */
__u16 __reserved_2;
__u32 aux_sample_size;
__u32 __reserved_3;
};
/*
@ -864,6 +868,8 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* { u64 size;
* char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,

View File

@ -1941,6 +1941,11 @@ static void perf_put_aux_event(struct perf_event *event)
}
}
static bool perf_need_aux_event(struct perf_event *event)
{
return !!event->attr.aux_output || !!event->attr.aux_sample_size;
}
static int perf_get_aux_event(struct perf_event *event,
struct perf_event *group_leader)
{
@ -1953,7 +1958,17 @@ static int perf_get_aux_event(struct perf_event *event,
if (!group_leader)
return 0;
if (!perf_aux_output_match(event, group_leader))
/*
* aux_output and aux_sample_size are mutually exclusive.
*/
if (event->attr.aux_output && event->attr.aux_sample_size)
return 0;
if (event->attr.aux_output &&
!perf_aux_output_match(event, group_leader))
return 0;
if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
return 0;
if (!atomic_long_inc_not_zero(&group_leader->refcount))
@ -2666,6 +2681,25 @@ perf_install_in_context(struct perf_event_context *ctx,
*/
smp_store_release(&event->ctx, ctx);
/*
* perf_event_attr::disabled events will not run and can be initialized
* without IPI. Except when this is the first event for the context, in
* that case we need the magic of the IPI to set ctx->is_active.
*
* The IOC_ENABLE that is sure to follow the creation of a disabled
* event will issue the IPI and reprogram the hardware.
*/
if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
raw_spin_lock_irq(&ctx->lock);
if (ctx->task == TASK_TOMBSTONE) {
raw_spin_unlock_irq(&ctx->lock);
return;
}
add_event_to_ctx(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
return;
}
if (!task) {
cpu_function_call(cpu, __perf_install_in_context, event);
return;
@ -3204,10 +3238,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
struct pmu *pmu = ctx->pmu;
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
/*
* PMU specific parts of task perf context can require
* additional synchronization. As an example of such
* synchronization see implementation details of Intel
* LBR call stack data profiling;
*/
if (pmu->swap_task_ctx)
pmu->swap_task_ctx(ctx, next_ctx);
else
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
/*
* RCU_INIT_POINTER here is safe because we've not
@ -4229,8 +4274,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (!task) {
/* Must be root to operate on a CPU event: */
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
err = perf_allow_cpu(&event->attr);
if (err)
return ERR_PTR(err);
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
@ -4539,6 +4585,8 @@ static void _free_event(struct perf_event *event)
unaccount_event(event);
security_perf_event_free(event);
if (event->rb) {
/*
* Can happen when we close an event with re-directed output.
@ -4992,6 +5040,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
struct perf_event_context *ctx;
int ret;
ret = security_perf_event_read(event);
if (ret)
return ret;
ctx = perf_event_ctx_lock(event);
ret = __perf_read(event, buf, count);
perf_event_ctx_unlock(event, ctx);
@ -5288,6 +5340,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct perf_event_context *ctx;
long ret;
/* Treat ioctl like writes as it is likely a mutating operation. */
ret = security_perf_event_write(event);
if (ret)
return ret;
ctx = perf_event_ctx_lock(event);
ret = _perf_ioctl(event, cmd, arg);
perf_event_ctx_unlock(event, ctx);
@ -5639,10 +5696,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
perf_pmu_output_stop(event);
/* now it's safe to free the pages */
if (!rb->aux_mmap_locked)
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
else
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
/* this has to be the last one */
rb_free_aux(rb);
@ -5753,6 +5808,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
ret = security_perf_event_read(event);
if (ret)
return ret;
vma_size = vma->vm_end - vma->vm_start;
if (vma->vm_pgoff == 0) {
@ -5859,13 +5918,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
if (user_locked <= user_lock_limit) {
/* charge all to locked_vm */
} else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
/* charge all to pinned_vm */
extra = user_extra;
user_extra = 0;
} else {
if (user_locked > user_lock_limit) {
/*
* charge locked_vm until it hits user_lock_limit;
* charge the rest from pinned_vm
@ -5878,7 +5931,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
lock_limit >>= PAGE_SHIFT;
locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
if ((locked > lock_limit) && perf_is_paranoid() &&
!capable(CAP_IPC_LOCK)) {
ret = -EPERM;
goto unlock;
@ -6208,6 +6261,122 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
}
}
static unsigned long perf_prepare_sample_aux(struct perf_event *event,
struct perf_sample_data *data,
size_t size)
{
struct perf_event *sampler = event->aux_event;
struct ring_buffer *rb;
data->aux_size = 0;
if (!sampler)
goto out;
if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
goto out;
if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
goto out;
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
if (!rb)
goto out;
/*
* If this is an NMI hit inside sampling code, don't take
* the sample. See also perf_aux_sample_output().
*/
if (READ_ONCE(rb->aux_in_sampling)) {
data->aux_size = 0;
} else {
size = min_t(size_t, size, perf_aux_size(rb));
data->aux_size = ALIGN(size, sizeof(u64));
}
ring_buffer_put(rb);
out:
return data->aux_size;
}
long perf_pmu_snapshot_aux(struct ring_buffer *rb,
struct perf_event *event,
struct perf_output_handle *handle,
unsigned long size)
{
unsigned long flags;
long ret;
/*
* Normal ->start()/->stop() callbacks run in IRQ mode in scheduler
* paths. If we start calling them in NMI context, they may race with
* the IRQ ones, that is, for example, re-starting an event that's just
* been stopped, which is why we're using a separate callback that
* doesn't change the event state.
*
* IRQs need to be disabled to prevent IPIs from racing with us.
*/
local_irq_save(flags);
/*
* Guard against NMI hits inside the critical section;
* see also perf_prepare_sample_aux().
*/
WRITE_ONCE(rb->aux_in_sampling, 1);
barrier();
ret = event->pmu->snapshot_aux(event, handle, size);
barrier();
WRITE_ONCE(rb->aux_in_sampling, 0);
local_irq_restore(flags);
return ret;
}
static void perf_aux_sample_output(struct perf_event *event,
struct perf_output_handle *handle,
struct perf_sample_data *data)
{
struct perf_event *sampler = event->aux_event;
unsigned long pad;
struct ring_buffer *rb;
long size;
if (WARN_ON_ONCE(!sampler || !data->aux_size))
return;
rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
if (!rb)
return;
size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);
/*
* An error here means that perf_output_copy() failed (returned a
* non-zero surplus that it didn't copy), which in its current
* enlightened implementation is not possible. If that changes, we'd
* like to know.
*/
if (WARN_ON_ONCE(size < 0))
goto out_put;
/*
* The pad comes from ALIGN()ing data->aux_size up to u64 in
* perf_prepare_sample_aux(), so should not be more than that.
*/
pad = data->aux_size - size;
if (WARN_ON_ONCE(pad >= sizeof(u64)))
pad = 8;
if (pad) {
u64 zero = 0;
perf_output_copy(handle, &zero, pad);
}
out_put:
ring_buffer_put(rb);
}
static void __perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event)
@ -6527,6 +6696,13 @@ void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
perf_output_put(handle, data->phys_addr);
if (sample_type & PERF_SAMPLE_AUX) {
perf_output_put(handle, data->aux_size);
if (data->aux_size)
perf_aux_sample_output(event, handle, data);
}
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
@ -6715,6 +6891,35 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
data->phys_addr = perf_virt_to_phys(data->addr);
if (sample_type & PERF_SAMPLE_AUX) {
u64 size;
header->size += sizeof(u64); /* size */
/*
* Given the 16bit nature of header::size, an AUX sample can
* easily overflow it, what with all the preceding sample bits.
* Make sure this doesn't happen by using up to U16_MAX bytes
* per sample in total (rounded down to 8 byte boundary).
*/
size = min_t(size_t, U16_MAX - header->size,
event->attr.aux_sample_size);
size = rounddown(size, 8);
size = perf_prepare_sample_aux(event, data, size);
WARN_ON_ONCE(size + header->size > U16_MAX);
header->size += size;
}
/*
* If you're adding more sample types here, you likely need to do
* something about the overflowing header::size, like repurpose the
* lowest 3 bits of size, which should be always zero at the moment.
* This raises a more important question, do we really need 512k sized
* samples and why, so good argumentation is in order for whatever you
* do here next.
*/
WARN_ON_ONCE(header->size & 7);
}
static __always_inline int
@ -10066,7 +10271,7 @@ static struct lock_class_key cpuctx_lock;
int perf_pmu_register(struct pmu *pmu, const char *name, int type)
{
int cpu, ret;
int cpu, ret, max = PERF_TYPE_MAX;
mutex_lock(&pmus_lock);
ret = -ENOMEM;
@ -10079,12 +10284,17 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
goto skip_type;
pmu->name = name;
if (type < 0) {
type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL);
if (type < 0) {
ret = type;
if (type != PERF_TYPE_SOFTWARE) {
if (type >= 0)
max = type;
ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
if (ret < 0)
goto free_pdc;
}
WARN_ON(type >= 0 && ret != type);
type = ret;
}
pmu->type = type;
@ -10161,7 +10371,16 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
list_add_rcu(&pmu->entry, &pmus);
/*
* Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
* since these cannot be in the IDR. This way the linear search
* is fast, provided a valid software event is provided.
*/
if (type == PERF_TYPE_SOFTWARE || !name)
list_add_rcu(&pmu->entry, &pmus);
else
list_add_tail_rcu(&pmu->entry, &pmus);
atomic_set(&pmu->exclusive_cnt, 0);
ret = 0;
unlock:
@ -10174,7 +10393,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
put_device(pmu->dev);
free_idr:
if (pmu->type >= PERF_TYPE_MAX)
if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
free_pdc:
@ -10196,7 +10415,7 @@ void perf_pmu_unregister(struct pmu *pmu)
synchronize_rcu();
free_percpu(pmu->pmu_disable_count);
if (pmu->type >= PERF_TYPE_MAX)
if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
if (pmu_bus_running) {
if (pmu->nr_addr_filters)
@ -10266,9 +10485,8 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
static struct pmu *perf_init_event(struct perf_event *event)
{
int idx, type, ret;
struct pmu *pmu;
int idx;
int ret;
idx = srcu_read_lock(&pmus_srcu);
@ -10280,13 +10498,28 @@ static struct pmu *perf_init_event(struct perf_event *event)
goto unlock;
}
/*
* PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
* are often aliases for PERF_TYPE_RAW.
*/
type = event->attr.type;
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
type = PERF_TYPE_RAW;
again:
rcu_read_lock();
pmu = idr_find(&pmu_idr, event->attr.type);
pmu = idr_find(&pmu_idr, type);
rcu_read_unlock();
if (pmu) {
ret = perf_try_init_event(pmu, event);
if (ret == -ENOENT && event->attr.type != type) {
type = event->attr.type;
goto again;
}
if (ret)
pmu = ERR_PTR(ret);
goto unlock;
}
@ -10618,11 +10851,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
}
}
err = security_perf_event_alloc(event);
if (err)
goto err_callchain_buffer;
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
return event;
err_callchain_buffer:
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
put_callchain_buffers();
}
err_addr_filters:
kfree(event->addr_filter_ranges);
@ -10673,7 +10915,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
attr->size = size;
if (attr->__reserved_1 || attr->__reserved_2)
if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
return -EINVAL;
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@ -10711,9 +10953,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
attr->branch_sample_type = mask;
}
/* privileged levels capture (kernel, hv): check permissions */
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
ret = perf_allow_kernel(attr);
if (ret)
return ret;
}
}
if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
@ -10926,13 +11170,19 @@ SYSCALL_DEFINE5(perf_event_open,
if (flags & ~PERF_FLAG_ALL)
return -EINVAL;
/* Do we allow access to perf_event_open(2) ? */
err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
if (err)
return err;
err = perf_copy_attr(attr_uptr, &attr);
if (err)
return err;
if (!attr.exclude_kernel) {
if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
err = perf_allow_kernel(&attr);
if (err)
return err;
}
if (attr.namespaces) {
@ -10949,9 +11199,11 @@ SYSCALL_DEFINE5(perf_event_open,
}
/* Only privileged users can get physical addresses */
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
return -EACCES;
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
err = perf_allow_kernel(&attr);
if (err)
return err;
}
err = security_locked_down(LOCKDOWN_PERF);
if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
@ -11213,7 +11465,7 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader))
goto err_locked;
/*

View File

@ -50,6 +50,7 @@ struct ring_buffer {
unsigned long aux_mmap_locked;
void (*free_aux)(void *);
refcount_t aux_refcount;
int aux_in_sampling;
void **aux_pages;
void *aux_priv;

View File

@ -562,6 +562,42 @@ void *perf_get_aux(struct perf_output_handle *handle)
}
EXPORT_SYMBOL_GPL(perf_get_aux);
/*
* Copy out AUX data from an AUX handle.
*/
long perf_output_copy_aux(struct perf_output_handle *aux_handle,
struct perf_output_handle *handle,
unsigned long from, unsigned long to)
{
unsigned long tocopy, remainder, len = 0;
struct ring_buffer *rb = aux_handle->rb;
void *addr;
from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
to &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
do {
tocopy = PAGE_SIZE - offset_in_page(from);
if (to > from)
tocopy = min(tocopy, to - from);
if (!tocopy)
break;
addr = rb->aux_pages[from >> PAGE_SHIFT];
addr += offset_in_page(from);
remainder = perf_output_copy(handle, addr, tocopy);
if (remainder)
return -EFAULT;
len += tocopy;
from += tocopy;
from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
} while (to != from);
return len;
}
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
static struct page *rb_alloc_aux_page(int node, int order)
@ -754,6 +790,14 @@ static void *perf_mmap_alloc_page(int cpu)
return page_address(page);
}
static void perf_mmap_free_page(void *addr)
{
struct page *page = virt_to_page(addr);
page->mapping = NULL;
__free_page(page);
}
struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
{
struct ring_buffer *rb;
@ -788,9 +832,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
fail_data_pages:
for (i--; i >= 0; i--)
free_page((unsigned long)rb->data_pages[i]);
perf_mmap_free_page(rb->data_pages[i]);
free_page((unsigned long)rb->user_page);
perf_mmap_free_page(rb->user_page);
fail_user_page:
kfree(rb);
@ -799,21 +843,13 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
return NULL;
}
static void perf_mmap_free_page(unsigned long addr)
{
struct page *page = virt_to_page((void *)addr);
page->mapping = NULL;
__free_page(page);
}
void rb_free(struct ring_buffer *rb)
{
int i;
perf_mmap_free_page((unsigned long)rb->user_page);
perf_mmap_free_page(rb->user_page);
for (i = 0; i < rb->nr_pages; i++)
perf_mmap_free_page((unsigned long)rb->data_pages[i]);
perf_mmap_free_page(rb->data_pages[i]);
kfree(rb);
}

View File

@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/security.h>
#include "trace.h"
#include "trace_probe.h"
@ -26,8 +27,10 @@ static int total_ref_count;
static int perf_trace_event_perm(struct trace_event_call *tp_event,
struct perf_event *p_event)
{
int ret;
if (tp_event->perf_perm) {
int ret = tp_event->perf_perm(tp_event, p_event);
ret = tp_event->perf_perm(tp_event, p_event);
if (ret)
return ret;
}
@ -46,8 +49,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
/* The ftrace function trace is allowed only for root. */
if (ftrace_event_is_function(tp_event)) {
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
ret = perf_allow_tracepoint(&p_event->attr);
if (ret)
return ret;
if (!is_sampling_event(p_event))
return 0;
@ -82,8 +86,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
* ...otherwise raw tracepoint data can be a severe data leak,
* only allow root to have these.
*/
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
ret = perf_allow_tracepoint(&p_event->attr);
if (ret)
return ret;
return 0;
}

View File

@ -2404,3 +2404,30 @@ int security_locked_down(enum lockdown_reason what)
return call_int_hook(locked_down, 0, what);
}
EXPORT_SYMBOL(security_locked_down);
#ifdef CONFIG_PERF_EVENTS
int security_perf_event_open(struct perf_event_attr *attr, int type)
{
return call_int_hook(perf_event_open, 0, attr, type);
}
int security_perf_event_alloc(struct perf_event *event)
{
return call_int_hook(perf_event_alloc, 0, event);
}
void security_perf_event_free(struct perf_event *event)
{
call_void_hook(perf_event_free, event);
}
int security_perf_event_read(struct perf_event *event)
{
return call_int_hook(perf_event_read, 0, event);
}
int security_perf_event_write(struct perf_event *event)
{
return call_int_hook(perf_event_write, 0, event);
}
#endif /* CONFIG_PERF_EVENTS */

View File

@ -6795,6 +6795,67 @@ struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
.lbs_msg_msg = sizeof(struct msg_security_struct),
};
#ifdef CONFIG_PERF_EVENTS
static int selinux_perf_event_open(struct perf_event_attr *attr, int type)
{
u32 requested, sid = current_sid();
if (type == PERF_SECURITY_OPEN)
requested = PERF_EVENT__OPEN;
else if (type == PERF_SECURITY_CPU)
requested = PERF_EVENT__CPU;
else if (type == PERF_SECURITY_KERNEL)
requested = PERF_EVENT__KERNEL;
else if (type == PERF_SECURITY_TRACEPOINT)
requested = PERF_EVENT__TRACEPOINT;
else
return -EINVAL;
return avc_has_perm(&selinux_state, sid, sid, SECCLASS_PERF_EVENT,
requested, NULL);
}
static int selinux_perf_event_alloc(struct perf_event *event)
{
struct perf_event_security_struct *perfsec;
perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL);
if (!perfsec)
return -ENOMEM;
perfsec->sid = current_sid();
event->security = perfsec;
return 0;
}
static void selinux_perf_event_free(struct perf_event *event)
{
struct perf_event_security_struct *perfsec = event->security;
event->security = NULL;
kfree(perfsec);
}
static int selinux_perf_event_read(struct perf_event *event)
{
struct perf_event_security_struct *perfsec = event->security;
u32 sid = current_sid();
return avc_has_perm(&selinux_state, sid, perfsec->sid,
SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL);
}
static int selinux_perf_event_write(struct perf_event *event)
{
struct perf_event_security_struct *perfsec = event->security;
u32 sid = current_sid();
return avc_has_perm(&selinux_state, sid, perfsec->sid,
SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL);
}
#endif
static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr),
LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction),
@ -7030,6 +7091,14 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
#endif
#ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open),
LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc),
LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free),
LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read),
LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write),
#endif
};
static __init int selinux_init(void)

View File

@ -244,6 +244,8 @@ struct security_class_mapping secclass_map[] = {
{"map_create", "map_read", "map_write", "prog_load", "prog_run"} },
{ "xdp_socket",
{ COMMON_SOCK_PERMS, NULL } },
{ "perf_event",
{"open", "cpu", "kernel", "tracepoint", "read", "write"} },
{ NULL }
};

View File

@ -141,7 +141,11 @@ struct pkey_security_struct {
};
struct bpf_security_struct {
u32 sid; /*SID of bpf obj creater*/
u32 sid; /* SID of bpf obj creator */
};
struct perf_event_security_struct {
u32 sid; /* SID of perf_event obj creator */
};
extern struct lsm_blob_sizes selinux_blob_sizes;

View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EMULATE_PREFIX_H
#define _ASM_X86_EMULATE_PREFIX_H
/*
* Virt escape sequences to trigger instruction emulation;
* ideally these would decode to 'whole' instruction and not destroy
* the instruction stream; sadly this is not true for the 'kvm' one :/
*/
#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
#endif

View File

@ -45,6 +45,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{

View File

@ -0,0 +1,146 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
* There are 256 IDT entries (per CPU - each entry is 8 bytes) which can
* be defined by Linux. They are used as a jump table by the CPU when a
* given vector is triggered - by a CPU-external, CPU-internal or
* software-triggered event.
*
* Linux sets the kernel code address each entry jumps to early during
* bootup, and never changes them. This is the general layout of the
* IDT entries:
*
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
* This file enumerates the exact layout of them:
*/
#define NMI_VECTOR 0x02
#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start at 0x20.
* (0x80 is the syscall vector, 0x30-0x3f are for ISA)
*/
#define FIRST_EXTERNAL_VECTOR 0x20
/*
* Reserve the lowest usable vector (and hence lowest priority) 0x20 for
* triggering cleanup after irq migration. 0x21-0x2f will still be used
* for device interrupts.
*/
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
#define IA32_SYSCALL_VECTOR 0x80
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
* round up to the next 16-vector boundary
*/
#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq)
/*
* Special IRQ vectors used by the SMP architecture, 0xf0-0xff
*
* some of the following vectors are 'rare', they are merged
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical.
*/
#define SPURIOUS_APIC_VECTOR 0xff
/*
* Sanity check
*/
#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
# error SPURIOUS_APIC_VECTOR definition error
#endif
#define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
#define THRESHOLD_APIC_VECTOR 0xf9
#define REBOOT_VECTOR 0xf8
/*
* Generic system vector for platform specific use
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
#define IRQ_WORK_VECTOR 0xf6
#define UV_BAU_MESSAGE 0xf5
#define DEFERRED_ERROR_VECTOR 0xf4
/* Vector on which hypervisor callbacks will be delivered */
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
#endif
#define LOCAL_TIMER_VECTOR 0xec
#define NR_VECTORS 256
#ifdef CONFIG_X86_LOCAL_APIC
#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
#else
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
/*
* Size the maximum number of interrupts.
*
* If the irq_desc[] array has a sparse layout, we can size things
* generously - it scales up linearly with the maximum number of CPUs,
* and the maximum number of IO-APICs, whichever is higher.
*
* In other cases we size more conservatively, to not create too large
* static arrays.
*/
#define NR_IRQS_LEGACY 16
#define CPU_VECTOR_LIMIT (64 * NR_CPUS)
#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS)
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI)
#define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
#elif defined(CONFIG_X86_IO_APIC)
#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
#elif defined(CONFIG_PCI_MSI)
#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT)
#else
#define NR_IRQS NR_IRQS_LEGACY
#endif
#endif /* _ASM_X86_IRQ_VECTORS_H */

View File

@ -0,0 +1,857 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_MSR_INDEX_H
#define _ASM_X86_MSR_INDEX_H
#include <linux/bits.h>
/*
* CPU model specific register (MSR) numbers.
*
* Do not add new entries to this file unless the definitions are shared
* between multiple compilation units.
*/
/* x86-64 specific MSRs */
#define MSR_EFER 0xc0000080 /* extended feature register */
#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
/* EFER bits: */
#define _EFER_SCE 0 /* SYSCALL/SYSRET */
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_IA32_UMWAIT_CONTROL 0xe1
#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
/*
* The time field is bit[31:2], but representing a 32bit value with
* bit[1:0] zero.
*/
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C1_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
* attack, so no Speculative Store Bypass
* control required.
*/
#define ARCH_CAP_MDS_NO BIT(5) /*
* Not susceptible to
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
* Writeback and invalidate the
* L1 data cache.
*/
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
#define MSR_IA32_MCG_CAP 0x00000179
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
#define MSR_IA32_MCG_EXT_CTL 0x000004d0
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
#define MSR_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
#define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
#define RTIT_CTL_CYCLEACC BIT(1)
#define RTIT_CTL_OS BIT(2)
#define RTIT_CTL_USR BIT(3)
#define RTIT_CTL_PWR_EVT_EN BIT(4)
#define RTIT_CTL_FUP_ON_PTW BIT(5)
#define RTIT_CTL_FABRIC_EN BIT(6)
#define RTIT_CTL_CR3EN BIT(7)
#define RTIT_CTL_TOPA BIT(8)
#define RTIT_CTL_MTC_EN BIT(9)
#define RTIT_CTL_TSC_EN BIT(10)
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_PTW_EN BIT(12)
#define RTIT_CTL_BRANCH_EN BIT(13)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
#define RTIT_CTL_PSB_FREQ_OFFSET 24
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
#define RTIT_CTL_ADDR0_OFFSET 32
#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
#define RTIT_CTL_ADDR1_OFFSET 36
#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
#define RTIT_CTL_ADDR2_OFFSET 40
#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
#define RTIT_CTL_ADDR3_OFFSET 44
#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
#define MSR_IA32_RTIT_STATUS 0x00000571
#define RTIT_STATUS_FILTEREN BIT(0)
#define RTIT_STATUS_CONTEXTEN BIT(1)
#define RTIT_STATUS_TRIGGEREN BIT(2)
#define RTIT_STATUS_BUFFOVF BIT(3)
#define RTIT_STATUS_ERROR BIT(4)
#define RTIT_STATUS_STOPPED BIT(5)
#define RTIT_STATUS_BYTECNT_OFFSET 32
#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET)
#define MSR_IA32_RTIT_ADDR0_A 0x00000580
#define MSR_IA32_RTIT_ADDR0_B 0x00000581
#define MSR_IA32_RTIT_ADDR1_A 0x00000582
#define MSR_IA32_RTIT_ADDR1_B 0x00000583
#define MSR_IA32_RTIT_ADDR2_A 0x00000584
#define MSR_IA32_RTIT_ADDR2_B 0x00000585
#define MSR_IA32_RTIT_ADDR3_A 0x00000586
#define MSR_IA32_RTIT_ADDR3_B 0x00000587
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
#define MSR_MTRRfix64K_00000 0x00000250
#define MSR_MTRRfix16K_80000 0x00000258
#define MSR_MTRRfix16K_A0000 0x00000259
#define MSR_MTRRfix4K_C0000 0x00000268
#define MSR_MTRRfix4K_C8000 0x00000269
#define MSR_MTRRfix4K_D0000 0x0000026a
#define MSR_MTRRfix4K_D8000 0x0000026b
#define MSR_MTRRfix4K_E0000 0x0000026c
#define MSR_MTRRfix4K_E8000 0x0000026d
#define MSR_MTRRfix4K_F0000 0x0000026e
#define MSR_MTRRfix4K_F8000 0x0000026f
#define MSR_MTRRdefType 0x000002ff
#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define DEBUGCTLMSR_BTINT (1UL << 8)
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
#define MSR_PEBS_FRONTEND 0x000003f7
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
/* C-state Residency Counters */
#define MSR_PKG_C3_RESIDENCY 0x000003f8
#define MSR_PKG_C6_RESIDENCY 0x000003f9
#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
#define MSR_PKG_C7_RESIDENCY 0x000003fa
#define MSR_CORE_C3_RESIDENCY 0x000003fc
#define MSR_CORE_C6_RESIDENCY 0x000003fd
#define MSR_CORE_C7_RESIDENCY 0x000003fe
#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
#define MSR_PKG_C2_RESIDENCY 0x0000060d
#define MSR_PKG_C8_RESIDENCY 0x00000630
#define MSR_PKG_C9_RESIDENCY 0x00000631
#define MSR_PKG_C10_RESIDENCY 0x00000632
/* Interrupt Response Limit */
#define MSR_PKGC3_IRTL 0x0000060a
#define MSR_PKGC6_IRTL 0x0000060b
#define MSR_PKGC7_IRTL 0x0000060c
#define MSR_PKGC8_IRTL 0x00000633
#define MSR_PKGC9_IRTL 0x00000634
#define MSR_PKGC10_IRTL 0x00000635
/* Run Time Average Power Limiting (RAPL) Interface */
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
#define MSR_PKG_ENERGY_STATUS 0x00000611
#define MSR_PKG_PERF_STATUS 0x00000613
#define MSR_PKG_POWER_INFO 0x00000614
#define MSR_DRAM_POWER_LIMIT 0x00000618
#define MSR_DRAM_ENERGY_STATUS 0x00000619
#define MSR_DRAM_PERF_STATUS 0x0000061b
#define MSR_DRAM_POWER_INFO 0x0000061c
#define MSR_PP0_POWER_LIMIT 0x00000638
#define MSR_PP0_ENERGY_STATUS 0x00000639
#define MSR_PP0_POLICY 0x0000063a
#define MSR_PP0_PERF_STATUS 0x0000063b
#define MSR_PP1_POWER_LIMIT 0x00000640
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
#define MSR_CORE_C1_RES 0x00000660
#define MSR_MODULE_C6_RES_MS 0x00000664
#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
#define MSR_ATOM_CORE_RATIOS 0x0000066a
#define MSR_ATOM_CORE_VIDS 0x0000066b
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
/* Hardware P state interface */
#define MSR_PPERF 0x0000064e
#define MSR_PERF_LIMIT_REASONS 0x0000064f
#define MSR_PM_ENABLE 0x00000770
#define MSR_HWP_CAPABILITIES 0x00000771
#define MSR_HWP_REQUEST_PKG 0x00000772
#define MSR_HWP_INTERRUPT 0x00000773
#define MSR_HWP_REQUEST 0x00000774
#define MSR_HWP_STATUS 0x00000777
/* CPUID.6.EAX */
#define HWP_BASE_BIT (1<<7)
#define HWP_NOTIFICATIONS_BIT (1<<8)
#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
/* IA32_HWP_CAPABILITIES */
#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
/* IA32_HWP_REQUEST */
#define HWP_MIN_PERF(x) (x & 0xff)
#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
#define HWP_EPP_PERFORMANCE 0x00
#define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
/* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
/* IA32_HWP_INTERRUPT */
#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
#define MSR_P6_EVNTSEL1 0x00000187
#define MSR_KNC_PERFCTR0 0x00000020
#define MSR_KNC_PERFCTR1 0x00000021
#define MSR_KNC_EVNTSEL0 0x00000028
#define MSR_KNC_EVNTSEL1 0x00000029
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
/* Auto-reload via MSR instead of DS area */
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
/*
* AMD64 MSRs. Not complete. See the architecture manual for a more
* complete list.
*/
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
#define MSR_AMD64_IBSFETCH_REG_COUNT 3
#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
#define MSR_AMD64_IBSOPDATA2 0xc0011036
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSOP_REG_COUNT 7
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4)
#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6)
#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8)
#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10)
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR
#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2)
#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4)
#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6)
#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8)
#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10)
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
#define MSR_F15H_PTSC 0xc0010280
#define MSR_F15H_IC_CFG 0xc0011021
#define MSR_F15H_EX_CFG 0xc001102c
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
#define FAM10H_MMIO_CONF_ENABLE (1<<0)
#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
#define MSR_F10H_DECFG 0xc0011029
#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_K8_SYSCFG 0xc0010010
#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
#define MSR_K8_TSEG_ADDR 0xc0010112
#define MSR_K8_TSEG_MASK 0xc0010113
#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
/* K7 MSRs */
#define MSR_K7_EVNTSEL0 0xc0010000
#define MSR_K7_PERFCTR0 0xc0010004
#define MSR_K7_EVNTSEL1 0xc0010001
#define MSR_K7_PERFCTR1 0xc0010005
#define MSR_K7_EVNTSEL2 0xc0010002
#define MSR_K7_PERFCTR2 0xc0010006
#define MSR_K7_EVNTSEL3 0xc0010003
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
#define MSR_K6_UWCCR 0xc0000085
#define MSR_K6_EPMR 0xc0000086
#define MSR_K6_PSOR 0xc0000087
#define MSR_K6_PFIR 0xc0000088
/* Centaur-Hauls/IDT defined MSRs. */
#define MSR_IDT_FCR1 0x00000107
#define MSR_IDT_FCR2 0x00000108
#define MSR_IDT_FCR3 0x00000109
#define MSR_IDT_FCR4 0x0000010a
#define MSR_IDT_MCR0 0x00000110
#define MSR_IDT_MCR1 0x00000111
#define MSR_IDT_MCR2 0x00000112
#define MSR_IDT_MCR3 0x00000113
#define MSR_IDT_MCR4 0x00000114
#define MSR_IDT_MCR5 0x00000115
#define MSR_IDT_MCR6 0x00000116
#define MSR_IDT_MCR7 0x00000117
#define MSR_IDT_MCR_CTRL 0x00000120
/* VIA Cyrix defined MSRs*/
#define MSR_VIA_FCR 0x00001107
#define MSR_VIA_LONGHAUL 0x0000110a
#define MSR_VIA_RNG 0x0000110b
#define MSR_VIA_BCR2 0x00001147
/* Transmeta defined MSRs */
#define MSR_TMTA_LONGRUN_CTRL 0x80868010
#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
#define MSR_TMTA_LRTI_READOUT 0x80868018
#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
/* Intel defined MSRs. */
#define MSR_IA32_P5_MC_ADDR 0x00000000
#define MSR_IA32_P5_MC_TYPE 0x00000001
#define MSR_IA32_TSC 0x00000010
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XSS 0x00000da0
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
#define FEATURE_CONTROL_LMCE (1<<20)
#define MSR_IA32_APICBASE 0x0000001b
#define MSR_IA32_APICBASE_BSP (1<<8)
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
#define MSR_IA32_SMBASE 0x0000009e
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
#define MSR_IA32_THERM_CONTROL 0x0000019a
#define MSR_IA32_THERM_INTERRUPT 0x0000019b
#define THERM_INT_HIGH_ENABLE (1 << 0)
#define THERM_INT_LOW_ENABLE (1 << 1)
#define THERM_INT_PLN_ENABLE (1 << 24)
#define MSR_IA32_THERM_STATUS 0x0000019c
#define THERM_STATUS_PROCHOT (1 << 0)
#define THERM_STATUS_POWER_LIMIT (1 << 10)
#define MSR_THERM2_CTL 0x0000019d
#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
#define MSR_MISC_FEATURE_CONTROL 0x000001a4
#define MSR_MISC_PWR_MGMT 0x000001aa
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define ENERGY_PERF_BIAS_PERFORMANCE 0
#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
#define ENERGY_PERF_BIAS_NORMAL 6
#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
#define ENERGY_PERF_BIAS_POWERSAVE 15
#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
#define THERM_SHIFT_THRESHOLD0 8
#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
#define THERM_SHIFT_THRESHOLD1 16
#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
#define THERM_STATUS_THRESHOLD0 (1 << 6)
#define THERM_LOG_THRESHOLD0 (1 << 7)
#define THERM_STATUS_THRESHOLD1 (1 << 8)
#define THERM_LOG_THRESHOLD1 (1 << 9)
/* MISC_ENABLE bits: architectural */
#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
/* MISC_FEATURES_ENABLES non-architectural features */
#define MSR_MISC_FEATURES_ENABLES 0x00000140
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
#define MSR_TSX_FORCE_ABORT 0x0000010F
#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
#define MSR_IA32_MCG_ECX 0x00000182
#define MSR_IA32_MCG_EDX 0x00000183
#define MSR_IA32_MCG_ESI 0x00000184
#define MSR_IA32_MCG_EDI 0x00000185
#define MSR_IA32_MCG_EBP 0x00000186
#define MSR_IA32_MCG_ESP 0x00000187
#define MSR_IA32_MCG_EFLAGS 0x00000188
#define MSR_IA32_MCG_EIP 0x00000189
#define MSR_IA32_MCG_RESERVED 0x0000018a
/* Pentium IV performance counter MSRs */
#define MSR_P4_BPU_PERFCTR0 0x00000300
#define MSR_P4_BPU_PERFCTR1 0x00000301
#define MSR_P4_BPU_PERFCTR2 0x00000302
#define MSR_P4_BPU_PERFCTR3 0x00000303
#define MSR_P4_MS_PERFCTR0 0x00000304
#define MSR_P4_MS_PERFCTR1 0x00000305
#define MSR_P4_MS_PERFCTR2 0x00000306
#define MSR_P4_MS_PERFCTR3 0x00000307
#define MSR_P4_FLAME_PERFCTR0 0x00000308
#define MSR_P4_FLAME_PERFCTR1 0x00000309
#define MSR_P4_FLAME_PERFCTR2 0x0000030a
#define MSR_P4_FLAME_PERFCTR3 0x0000030b
#define MSR_P4_IQ_PERFCTR0 0x0000030c
#define MSR_P4_IQ_PERFCTR1 0x0000030d
#define MSR_P4_IQ_PERFCTR2 0x0000030e
#define MSR_P4_IQ_PERFCTR3 0x0000030f
#define MSR_P4_IQ_PERFCTR4 0x00000310
#define MSR_P4_IQ_PERFCTR5 0x00000311
#define MSR_P4_BPU_CCCR0 0x00000360
#define MSR_P4_BPU_CCCR1 0x00000361
#define MSR_P4_BPU_CCCR2 0x00000362
#define MSR_P4_BPU_CCCR3 0x00000363
#define MSR_P4_MS_CCCR0 0x00000364
#define MSR_P4_MS_CCCR1 0x00000365
#define MSR_P4_MS_CCCR2 0x00000366
#define MSR_P4_MS_CCCR3 0x00000367
#define MSR_P4_FLAME_CCCR0 0x00000368
#define MSR_P4_FLAME_CCCR1 0x00000369
#define MSR_P4_FLAME_CCCR2 0x0000036a
#define MSR_P4_FLAME_CCCR3 0x0000036b
#define MSR_P4_IQ_CCCR0 0x0000036c
#define MSR_P4_IQ_CCCR1 0x0000036d
#define MSR_P4_IQ_CCCR2 0x0000036e
#define MSR_P4_IQ_CCCR3 0x0000036f
#define MSR_P4_IQ_CCCR4 0x00000370
#define MSR_P4_IQ_CCCR5 0x00000371
#define MSR_P4_ALF_ESCR0 0x000003ca
#define MSR_P4_ALF_ESCR1 0x000003cb
#define MSR_P4_BPU_ESCR0 0x000003b2
#define MSR_P4_BPU_ESCR1 0x000003b3
#define MSR_P4_BSU_ESCR0 0x000003a0
#define MSR_P4_BSU_ESCR1 0x000003a1
#define MSR_P4_CRU_ESCR0 0x000003b8
#define MSR_P4_CRU_ESCR1 0x000003b9
#define MSR_P4_CRU_ESCR2 0x000003cc
#define MSR_P4_CRU_ESCR3 0x000003cd
#define MSR_P4_CRU_ESCR4 0x000003e0
#define MSR_P4_CRU_ESCR5 0x000003e1
#define MSR_P4_DAC_ESCR0 0x000003a8
#define MSR_P4_DAC_ESCR1 0x000003a9
#define MSR_P4_FIRM_ESCR0 0x000003a4
#define MSR_P4_FIRM_ESCR1 0x000003a5
#define MSR_P4_FLAME_ESCR0 0x000003a6
#define MSR_P4_FLAME_ESCR1 0x000003a7
#define MSR_P4_FSB_ESCR0 0x000003a2
#define MSR_P4_FSB_ESCR1 0x000003a3
#define MSR_P4_IQ_ESCR0 0x000003ba
#define MSR_P4_IQ_ESCR1 0x000003bb
#define MSR_P4_IS_ESCR0 0x000003b4
#define MSR_P4_IS_ESCR1 0x000003b5
#define MSR_P4_ITLB_ESCR0 0x000003b6
#define MSR_P4_ITLB_ESCR1 0x000003b7
#define MSR_P4_IX_ESCR0 0x000003c8
#define MSR_P4_IX_ESCR1 0x000003c9
#define MSR_P4_MOB_ESCR0 0x000003aa
#define MSR_P4_MOB_ESCR1 0x000003ab
#define MSR_P4_MS_ESCR0 0x000003c0
#define MSR_P4_MS_ESCR1 0x000003c1
#define MSR_P4_PMH_ESCR0 0x000003ac
#define MSR_P4_PMH_ESCR1 0x000003ad
#define MSR_P4_RAT_ESCR0 0x000003bc
#define MSR_P4_RAT_ESCR1 0x000003bd
#define MSR_P4_SAAT_ESCR0 0x000003ae
#define MSR_P4_SAAT_ESCR1 0x000003af
#define MSR_P4_SSU_ESCR0 0x000003be
#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
#define MSR_P4_TBPU_ESCR0 0x000003c2
#define MSR_P4_TBPU_ESCR1 0x000003c3
#define MSR_P4_TC_ESCR0 0x000003c4
#define MSR_P4_TC_ESCR1 0x000003c5
#define MSR_P4_U2L_ESCR0 0x000003b0
#define MSR_P4_U2L_ESCR1 0x000003b1
#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
/* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
/* Intel VT MSRs */
#define MSR_IA32_VMX_BASIC 0x00000480
#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
#define MSR_IA32_VMX_MISC 0x00000485
#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
#define MSR_IA32_VMX_VMFUNC 0x00000491
/* VMX_BASIC bits and bitmasks */
#define VMX_BASIC_VMCS_SIZE_SHIFT 32
#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
#define VMX_BASIC_64 0x0001000000000000LLU
#define VMX_BASIC_MEM_TYPE_SHIFT 50
#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
#define VMX_BASIC_MEM_TYPE_WB 6LLU
#define VMX_BASIC_INOUT 0x0040000000000000LLU
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
#endif /* _ASM_X86_MSR_INDEX_H */

View File

@ -13,6 +13,8 @@
#include "../include/asm/inat.h"
#include "../include/asm/insn.h"
#include "../include/asm/emulate_prefix.h"
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->addr_bytes = 4;
}
static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
static int __insn_get_emulate_prefix(struct insn *insn,
const insn_byte_t *prefix, size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
goto err_out;
}
insn->emulate_prefix_size = len;
insn->next_byte += len;
return 1;
err_out:
return 0;
}
static void insn_get_emulate_prefix(struct insn *insn)
{
if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
return;
__insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
}
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
if (prefixes->got)
return;
insn_get_emulate_prefix(insn);
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);

View File

@ -333,7 +333,7 @@ AVXcode: 1
06: CLTS
07: SYSRET (o64)
08: INVD
09: WBINVD
09: WBINVD | WBNOINVD (F3)
0a:
0b: UD2 (1B)
0c:
@ -364,7 +364,7 @@ AVXcode: 1
# a ModR/M byte.
1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c:
1c: Grp20 (1A),(1C)
1d:
1e:
1f: NOP Ev
@ -792,6 +792,8 @@ f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
@ -943,9 +945,9 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5: rdpkru (110),(11B) | wrpkru (111),(11B)
@ -1020,7 +1022,7 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@ -1051,6 +1053,10 @@ GrpTable: Grp19
6: vscatterpf1qps/d Wx (66),(ev)
EndTable
GrpTable: Grp20
0: cldemote Mb
EndTable
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH

View File

@ -141,8 +141,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
@ -300,6 +301,7 @@ enum perf_event_read_format {
/* add: sample_stack_user */
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
/*
* Hardware event_id to monitor via a performance monitoring event:
@ -424,7 +426,9 @@ struct perf_event_attr {
*/
__u32 aux_watermark;
__u16 sample_max_stack;
__u16 __reserved_2; /* align to __u64 */
__u16 __reserved_2;
__u32 aux_sample_size;
__u32 __reserved_3;
};
/*
@ -864,6 +868,8 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* { u64 size;
* char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,

View File

@ -19,8 +19,7 @@ MAKEFLAGS += --no-print-directory
LIBFILE = $(OUTPUT)libsubcmd.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
ifeq ($(DEBUG),0)
ifeq ($(feature-fortify-source), 1)
@ -28,7 +27,9 @@ ifeq ($(DEBUG),0)
endif
endif
ifeq ($(CC_NO_CLANG), 0)
ifeq ($(DEBUG),1)
CFLAGS += -O0
else ifeq ($(CC_NO_CLANG), 0)
CFLAGS += -O3
else
CFLAGS += -O6
@ -43,6 +44,8 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
CFLAGS += -I$(srctree)/tools/include/
CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
all:

View File

@ -232,10 +232,10 @@ install_pkgconfig:
install_headers:
$(call QUIET_INSTALL, headers) \
$(call do_install,event-parse.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,event-utils.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,trace-seq.h,$(DESTDIR)$(includedir_SQ),644); \
$(call do_install,kbuffer.h,$(DESTDIR)$(includedir_SQ),644)
$(call do_install,event-parse.h,$(includedir_SQ),644); \
$(call do_install,event-utils.h,$(includedir_SQ),644); \
$(call do_install,trace-seq.h,$(includedir_SQ),644); \
$(call do_install,kbuffer.h,$(includedir_SQ),644)
install: install_lib

View File

@ -4395,8 +4395,10 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s
/* fall through */
case 'd':
case 'u':
case 'x':
case 'i':
case 'x':
case 'X':
case 'o':
switch (ls) {
case 0:
vsize = 4;
@ -5078,10 +5080,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e
/* fall through */
case 'd':
case 'u':
case 'i':
case 'x':
case 'X':
case 'u':
case 'o':
if (!arg) {
do_warning_event(event, "no argument match");
event->flags |= TEP_EVENT_FL_FAILED;

View File

@ -1473,8 +1473,10 @@ static int copy_filter_type(struct tep_event_filter *filter,
if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
/* Add trivial event */
arg = allocate_arg();
if (arg == NULL)
if (arg == NULL) {
free(str);
return -1;
}
arg->type = TEP_FILTER_ARG_BOOLEAN;
if (strcmp(str, "TRUE") == 0)
@ -1483,8 +1485,11 @@ static int copy_filter_type(struct tep_event_filter *filter,
arg->boolean.value = 0;
filter_type = add_filter_type(filter, event->id);
if (filter_type == NULL)
if (filter_type == NULL) {
free(str);
free_arg(arg);
return -1;
}
filter_type->filter = arg;

View File

@ -4,6 +4,7 @@
FILES='
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
'
@ -46,6 +47,6 @@ done
check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"'
check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
cd -

View File

@ -434,6 +434,56 @@ pwr_evt Enable power events. The power events provide information about
"0" otherwise.
AUX area sampling option
------------------------
To select Intel PT "sampling" the AUX area sampling option can be used:
--aux-sample
Optionally it can be followed by the sample size in bytes e.g.
--aux-sample=8192
In addition, the Intel PT event to sample must be defined e.g.
-e intel_pt//u
Samples on other events will be created containing Intel PT data e.g. the
following will create Intel PT samples on the branch-misses event, note the
events must be grouped using {}:
perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
events. In this case, the grouping is implied e.g.
perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
is the same as:
perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
but allows for also using an address filter e.g.:
perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
It is important to select a sample size that is big enough to contain at least
one PSB packet. If not a warning will be displayed:
Intel PT sample size (%zu) may be too small for PSB period (%zu)
The calculation used for that is: if sample_size <= psb_period + 256 display the
warning. When sampling is used, psb_period defaults to 0 (2KiB).
The default sample size is 4KiB.
The sample size is passed in aux_sample_size in struct perf_event_attr. The
sample size is limited by the maximum event size which is 64KiB. It is
difficult to know how big the event might be without the trace sample attached,
but the tool validates that the sample size is not greater than 60KiB.
new snapshot option
-------------------
@ -487,8 +537,8 @@ their mlock limit (which defaults to 64KiB but is not multiplied by the number
of cpus).
In full-trace mode, powers of two are allowed for buffer size, with a minimum
size of 2 pages. In snapshot mode, it is the same but the minimum size is
1 page.
size of 2 pages. In snapshot mode or sampling mode, it is the same but the
minimum size is 1 page.
The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
@ -501,12 +551,17 @@ Intel PT modes of operation
Intel PT can be used in 2 modes:
full-trace mode
sample mode
snapshot mode
Full-trace mode traces continuously e.g.
perf record -e intel_pt//u uname
Sample mode attaches a Intel PT sample to other events e.g.
perf record --aux-sample -e intel_pt//u -e branch-misses:u
Snapshot mode captures the available data when a signal is sent e.g.
perf record -v -e intel_pt//u -S ./loopy 1000000000 &

View File

@ -561,6 +561,11 @@ trace.*::
trace.show_zeros::
Do not suppress syscall arguments that are equal to zero.
trace.tracepoint_beautifiers::
Use "libtraceevent" to use that library to augment the tracepoint arguments,
"libbeauty", the default, to use the same argument beautifiers used in the
strace-like sys_enter+sys_exit lines.
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.

View File

@ -95,6 +95,11 @@ OPTIONS
diff.compute config option. See COMPARISON METHODS section for
more info.
--cycles-hist::
Report a histogram and the standard deviation for cycles data.
It can help us to judge if the reported cycles data is noisy or
not. This option should be used with '-c cycles'.
-p::
--period::
Show period values for both compared hist entries.

View File

@ -36,6 +36,9 @@ Enable debugging output.
Print how named events are resolved internally into perf events, and also
any extra expressions computed by perf stat.
--deprecated::
Print deprecated events. By default the deprecated events are hidden.
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
---------------

View File

@ -62,6 +62,9 @@ OPTIONS
like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
- 'aux-output': Generate AUX records instead of events. This requires
that an AUX area event is also provided.
- 'aux-sample-size': Set sample size for AUX area sampling. If the
'--aux-sample' option has been used, set aux-sample-size=0 to disable
AUX area sampling for the event.
See the linkperf:perf-list[1] man page for more parameters.
@ -433,6 +436,12 @@ can be specified in a string that follows this option:
In Snapshot Mode trace data is captured only when signal SIGUSR2 is received
and on exit if the above 'e' option is given.
--aux-sample[=OPTIONS]::
Select AUX area sampling. At least one of the events selected by the -e option
must be an AUX area event. Samples on other events will be created containing
data from the AUX area. Optionally sample size may be specified, otherwise it
defaults to 4KiB.
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
@ -571,6 +580,13 @@ config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
Implies --tail-synthesize.
--kcore::
Make a copy of /proc/kcore and place it into a directory with the perf data file.
--max-size=<size>::
Limit the sample data max size, <size> is expected to be a number with
appended unit character - B/K/M/G
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]

View File

@ -525,6 +525,17 @@ include::itrace.txt[]
Configure time quantum for time sort key. Default 100ms.
Accepts s, us, ms, ns units.
--total-cycles::
When --total-cycles is specified, it supports sorting for all blocks by
'Sampled Cycles%'. This is useful to concentrate on the globally hottest
blocks. In output, there are some new columns:
'Sampled Cycles%' - block sampled cycles aggregation / total sampled cycles
'Sampled Cycles' - block sampled cycles aggregation
'Avg Cycles%' - block average sampled cycles / sum of total block average
sampled cycles
'Avg Cycles' - block average sampled cycles
include::callchain-overhead-calculation.txt[]
SEE ALSO

View File

@ -217,6 +217,11 @@ core number and the number of online logical processors on that physical process
Aggregate counts per monitored threads, when monitoring threads (-t option)
or processes (-p option).
--per-node::
Aggregate counts per NUMA nodes for system-wide mode measurements. This
is a useful mode to detect imbalance between NUMA nodes. To enable this
mode, use --per-node in addition to -a. (system-wide).
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
@ -323,6 +328,12 @@ The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
Users who wants to get the actual value can apply --no-metric-only.
--all-kernel::
Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
EXAMPLES
--------

View File

@ -42,6 +42,11 @@ OPTIONS
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.
--filter=<filter>::
Event filter. This option should follow an event selector (-e) which
selects tracepoint event(s).
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
@ -141,6 +146,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
Show all syscalls followed by a summary by thread with min, max, and
average times (in msec) and relative stddev.
--errno-summary::
To be used with -s or -S, to show stats for the errnos experienced by
syscalls, using only this option will trigger --summary.
--tool_stats::
Show tool stats such as number of times fd->pathname was discovered thru
hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
@ -219,6 +228,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
may happen, for instance, when a thread gets migrated to a different CPU
while processing a syscall.
--libtraceevent_print::
Use libtraceevent to print tracepoint arguments. By default 'perf trace' uses
the same beautifiers used in the strace-like enter+exit lines to augment the
tracepoint arguments.
--map-dump::
Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls
living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this

View File

@ -0,0 +1,63 @@
perf.data directory format
DISCLAIMER This is not ABI yet and is subject to possible change
in following versions of perf. We will remove this
disclaimer once the directory format soaks in.
This document describes the on-disk perf.data directory format.
The layout is described by HEADER_DIR_FORMAT feature.
Currently it holds only version number (0):
HEADER_DIR_FORMAT = 24
struct {
uint64_t version;
}
The current only version value 0 means that:
- there is a single perf.data file named 'data' within the directory.
e.g.
$ tree -ps perf.data
perf.data
└── [-rw------- 25912] data
Future versions are expected to describe different data files
layout according to special needs.
Currently the only 'perf record' option to output to a directory is
the --kcore option which puts a copy of /proc/kcore into the directory.
e.g.
$ sudo perf record --kcore uname
Linux
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.015 MB perf.data (9 samples) ]
$ sudo tree -ps perf.data
perf.data
├── [-rw------- 23744] data
└── [drwx------ 4096] kcore_dir
├── [-r-------- 6731125] kallsyms
├── [-r-------- 40230912] kcore
└── [-r-------- 5419] modules
1 directory, 4 files
$ sudo perf script -v
build id event received for vmlinux: 1eaa285996affce2d74d8e66dcea09a80c9941de
build id event received for [vdso]: 8bbaf5dc62a9b644b4d4e4539737e104e4a84541
build id event received for /lib/x86_64-linux-gnu/libc-2.28.so: 5b157f49586a3ca84d55837f97ff466767dd3445
Samples for 'cycles' event do not have CPU attribute set. Skipping 'cpu' field.
Using CPUID GenuineIntel-6-8E-A
Using perf.data/kcore_dir/kcore for kernel data
Using perf.data/kcore_dir/kallsyms for symbols
perf 15316 2060795.480902: 1 cycles: ffffffffa2caa548 native_write_msr+0x8 (vmlinux)
perf 15316 2060795.480906: 1 cycles: ffffffffa2caa548 native_write_msr+0x8 (vmlinux)
perf 15316 2060795.480908: 7 cycles: ffffffffa2caa548 native_write_msr+0x8 (vmlinux)
perf 15316 2060795.480910: 119 cycles: ffffffffa2caa54a native_write_msr+0xa (vmlinux)
perf 15316 2060795.480912: 2109 cycles: ffffffffa2c9b7b0 native_apic_msr_write+0x0 (vmlinux)
perf 15316 2060795.480914: 37606 cycles: ffffffffa2f121fe perf_event_addr_filters_exec+0x2e (vmlinux)
uname 15316 2060795.480924: 588287 cycles: ffffffffa303a56d page_counter_try_charge+0x6d (vmlinux)
uname 15316 2060795.481067: 2261945 cycles: ffffffffa301438f kmem_cache_free+0x4f (vmlinux)
uname 15316 2060795.481643: 2172167 cycles: 7f1a48c393c0 _IO_un_link+0x0 (/lib/x86_64-linux-gnu/libc-2.28.so)

View File

@ -24,6 +24,8 @@ OPTIONS
data-convert - data convert command debug messages
stderr - write debug output (option -v) to stderr
in browser mode
perf-event-open - Print perf_event_open() arguments and
return value
--buildid-dir::
Setup buildid cache directory. It has higher priority than

View File

@ -188,7 +188,7 @@ endif
# Treat warnings as errors unless directed not to
ifneq ($(WERROR),0)
CFLAGS += -Werror
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
endif
@ -198,9 +198,9 @@ endif
ifeq ($(DEBUG),0)
ifeq ($(CC_NO_CLANG), 0)
CFLAGS += -O3
CORE_CFLAGS += -O3
else
CFLAGS += -O6
CORE_CFLAGS += -O6
endif
endif
@ -245,12 +245,12 @@ FEATURE_CHECK_LDFLAGS-libaio = -lrt
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3
CFLAGS += -funwind-tables
CFLAGS += -Wall
CFLAGS += -Wextra
CFLAGS += -std=gnu99
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
CORE_CFLAGS += -Wall
CORE_CFLAGS += -Wextra
CORE_CFLAGS += -std=gnu99
CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti
CXXFLAGS += -Wall
@ -265,6 +265,11 @@ LDFLAGS += -Wl,-z,noexecstack
EXTLIBS = -lpthread -lrt -lm -ldl
ifneq ($(TCMALLOC),)
CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
EXTLIBS += -ltcmalloc
endif
ifeq ($(FEATURES_DUMP),)
include $(srctree)/tools/build/Makefile.feature
else
@ -272,12 +277,12 @@ include $(FEATURES_DUMP)
endif
ifeq ($(feature-stackprotector-all), 1)
CFLAGS += -fstack-protector-all
CORE_CFLAGS += -fstack-protector-all
endif
ifeq ($(DEBUG),0)
ifeq ($(feature-fortify-source), 1)
CFLAGS += -D_FORTIFY_SOURCE=2
CORE_CFLAGS += -D_FORTIFY_SOURCE=2
endif
endif
@ -301,10 +306,12 @@ INC_FLAGS += -I$(src-perf)/util
INC_FLAGS += -I$(src-perf)
INC_FLAGS += -I$(srctree)/tools/lib/
CFLAGS += $(INC_FLAGS)
CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
CFLAGS += $(CORE_CFLAGS) $(INC_FLAGS)
CXXFLAGS += $(INC_FLAGS)
CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS)
ifeq ($(feature-sync-compare-and-swap), 1)
CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT

View File

@ -114,6 +114,8 @@ include ../scripts/utilities.mak
# Define NO_LIBZSTD if you do not want support of Zstandard based runtime
# trace compression in record mode.
#
# Define TCMALLOC to enable tcmalloc heap profiling.
#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@ -407,6 +409,7 @@ linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/
x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@ -543,6 +546,18 @@ x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
$(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl)
$(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@
x86_arch_irq_vectors_array := $(beauty_outdir)/x86_arch_irq_vectors_array.c
x86_arch_irq_vectors_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
$(x86_arch_irq_vectors_array): $(x86_arch_asm_dir)/irq_vectors.h $(x86_arch_irq_vectors_tbl)
$(Q)$(SHELL) '$(x86_arch_irq_vectors_tbl)' $(x86_arch_asm_dir) > $@
x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c
x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh
$(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl)
$(Q)$(SHELL) '$(x86_arch_MSRs_tbl)' $(x86_arch_asm_dir) > $@
rename_flags_array := $(beauty_outdir)/rename_flags_array.c
rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh
@ -677,6 +692,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(perf_ioctl_array) \
$(prctl_option_array) \
$(usbdevfs_ioctl_array) \
$(x86_arch_irq_vectors_array) \
$(x86_arch_MSRs_array) \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array) \
@ -761,7 +778,7 @@ $(LIBBPF)-clean:
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBPERF): FORCE
$(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) $(OUTPUT)libperf.a
$(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a
$(LIBPERF)-clean:
$(call QUIET_CLEAN, libperf)
@ -981,6 +998,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
$(OUTPUT)$(usbdevfs_ioctl_array) \
$(OUTPUT)$(x86_arch_irq_vectors_array) \
$(OUTPUT)$(x86_arch_MSRs_array) \
$(OUTPUT)$(x86_arch_prctl_code_array) \
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array) \

View File

@ -1,3 +1,5 @@
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};

View File

@ -1,4 +1,5 @@
perf-y += header.o
perf-y += perf_regs.o
perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};

View File

@ -6,9 +6,10 @@
#include "symbol.h" // for the elf__needs_adjust_symbols() prototype
#include <stdbool.h>
#include <gelf.h>
#ifdef HAVE_LIBELF_SUPPORT
#include <gelf.h>
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
return ehdr.e_type == ET_EXEC ||

View File

@ -1,2 +1,4 @@
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};

View File

@ -113,10 +113,10 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist)
struct parse_events_error err;
int ret;
err.str = NULL;
bzero(&err, sizeof(err));
ret = parse_events(evlist, str, &err);
if (err.str)
pr_err("%s : %s\n", str, err.str);
parse_events_print_error(&err, "tracepoint");
return ret;
}

View File

@ -1,2 +1,4 @@
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};

View File

@ -7,7 +7,7 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
char *endptr, *tok, *name;
struct map *map = ms->map;
struct addr_map_symbol target = {
.map = map,
.ms = { .map = map, },
};
tok = strchr(ops->raw, ',');
@ -38,9 +38,9 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
return -1;
target.addr = map__objdump_2mem(map, ops->target.addr);
if (map_groups__find_ams(&target) == 0 &&
map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
ops->target.sym = target.sym;
if (map_groups__find_ams(ms->mg, &target) == 0 &&
map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr)
ops->target.sym = target.ms.sym;
return 0;
}

View File

@ -1,5 +1,6 @@
perf-y += header.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

View File

@ -0,0 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};

View File

@ -1647,6 +1647,12 @@
"0f ae 30 \txsaveopt (%eax)",},
{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
"0f ae f0 \tmfence ",},
{{0x0f, 0x1c, 0x00, }, 3, 0, "", "",
"0f 1c 00 \tcldemote (%eax)",},
{{0x0f, 0x1c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
"0f 1c 05 78 56 34 12 \tcldemote 0x12345678",},
{{0x0f, 0x1c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f 1c 84 c8 78 56 34 12 \tcldemote 0x12345678(%eax,%ecx,8)",},
{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
"0f c7 20 \txsavec (%eax)",},
{{0x0f, 0xc7, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
@ -1677,3 +1683,49 @@
"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
{{0x66, 0x0f, 0xae, 0xf3, }, 4, 0, "", "",
"66 0f ae f3 \ttpause %ebx",},
{{0x67, 0xf3, 0x0f, 0xae, 0xf0, }, 5, 0, "", "",
"67 f3 0f ae f0 \tumonitor %ax",},
{{0xf3, 0x0f, 0xae, 0xf0, }, 4, 0, "", "",
"f3 0f ae f0 \tumonitor %eax",},
{{0xf2, 0x0f, 0xae, 0xf0, }, 4, 0, "", "",
"f2 0f ae f0 \tumwait %eax",},
{{0x0f, 0x38, 0xf9, 0x03, }, 4, 0, "", "",
"0f 38 f9 03 \tmovdiri %eax,(%ebx)",},
{{0x0f, 0x38, 0xf9, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f 38 f9 88 78 56 34 12 \tmovdiri %ecx,0x12345678(%eax)",},
{{0x66, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"66 0f 38 f8 18 \tmovdir64b (%eax),%ebx",},
{{0x66, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"66 0f 38 f8 88 78 56 34 12 \tmovdir64b 0x12345678(%eax),%ecx",},
{{0x67, 0x66, 0x0f, 0x38, 0xf8, 0x1c, }, 6, 0, "", "",
"67 66 0f 38 f8 1c \tmovdir64b (%si),%bx",},
{{0x67, 0x66, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
"67 66 0f 38 f8 8c 34 12 \tmovdir64b 0x1234(%si),%cx",},
{{0xf2, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"f2 0f 38 f8 18 \tenqcmd (%eax),%ebx",},
{{0xf2, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f2 0f 38 f8 88 78 56 34 12 \tenqcmd 0x12345678(%eax),%ecx",},
{{0x67, 0xf2, 0x0f, 0x38, 0xf8, 0x1c, }, 6, 0, "", "",
"67 f2 0f 38 f8 1c \tenqcmd (%si),%bx",},
{{0x67, 0xf2, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
"67 f2 0f 38 f8 8c 34 12 \tenqcmd 0x1234(%si),%cx",},
{{0xf3, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",},
{{0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x1c, }, 6, 0, "", "",
"67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
"67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
"0f 01 d7 \tenclu ",},
{{0x0f, 0x01, 0xc0, }, 3, 0, "", "",
"0f 01 c0 \tenclv ",},
{{0x0f, 0x01, 0xc5, }, 3, 0, "", "",
"0f 01 c5 \tpconfig ",},
{{0xf3, 0x0f, 0x09, }, 3, 0, "", "",
"f3 0f 09 \twbnoinvd ",},

View File

@ -1667,6 +1667,16 @@
"41 0f ae 30 \txsaveopt (%r8)",},
{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
"0f ae f0 \tmfence ",},
{{0x0f, 0x1c, 0x00, }, 3, 0, "", "",
"0f 1c 00 \tcldemote (%rax)",},
{{0x41, 0x0f, 0x1c, 0x00, }, 4, 0, "", "",
"41 0f 1c 00 \tcldemote (%r8)",},
{{0x0f, 0x1c, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f 1c 04 25 78 56 34 12 \tcldemote 0x12345678",},
{{0x0f, 0x1c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f 1c 84 c8 78 56 34 12 \tcldemote 0x12345678(%rax,%rcx,8)",},
{{0x41, 0x0f, 0x1c, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"41 0f 1c 84 c8 78 56 34 12 \tcldemote 0x12345678(%r8,%rcx,8)",},
{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
"0f c7 20 \txsavec (%rax)",},
{{0x41, 0x0f, 0xc7, 0x20, }, 4, 0, "", "",
@ -1727,3 +1737,55 @@
"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",},
{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",},
{{0x66, 0x0f, 0xae, 0xf3, }, 4, 0, "", "",
"66 0f ae f3 \ttpause %ebx",},
{{0x66, 0x41, 0x0f, 0xae, 0xf0, }, 5, 0, "", "",
"66 41 0f ae f0 \ttpause %r8d",},
{{0x67, 0xf3, 0x0f, 0xae, 0xf0, }, 5, 0, "", "",
"67 f3 0f ae f0 \tumonitor %eax",},
{{0xf3, 0x0f, 0xae, 0xf0, }, 4, 0, "", "",
"f3 0f ae f0 \tumonitor %rax",},
{{0x67, 0xf3, 0x41, 0x0f, 0xae, 0xf0, }, 6, 0, "", "",
"67 f3 41 0f ae f0 \tumonitor %r8d",},
{{0xf2, 0x0f, 0xae, 0xf0, }, 4, 0, "", "",
"f2 0f ae f0 \tumwait %eax",},
{{0xf2, 0x41, 0x0f, 0xae, 0xf0, }, 5, 0, "", "",
"f2 41 0f ae f0 \tumwait %r8d",},
{{0x48, 0x0f, 0x38, 0xf9, 0x03, }, 5, 0, "", "",
"48 0f 38 f9 03 \tmovdiri %rax,(%rbx)",},
{{0x48, 0x0f, 0x38, 0xf9, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"48 0f 38 f9 88 78 56 34 12 \tmovdiri %rcx,0x12345678(%rax)",},
{{0x66, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"66 0f 38 f8 18 \tmovdir64b (%rax),%rbx",},
{{0x66, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"66 0f 38 f8 88 78 56 34 12 \tmovdir64b 0x12345678(%rax),%rcx",},
{{0x67, 0x66, 0x0f, 0x38, 0xf8, 0x18, }, 6, 0, "", "",
"67 66 0f 38 f8 18 \tmovdir64b (%eax),%ebx",},
{{0x67, 0x66, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"67 66 0f 38 f8 88 78 56 34 12 \tmovdir64b 0x12345678(%eax),%ecx",},
{{0xf2, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"f2 0f 38 f8 18 \tenqcmd (%rax),%rbx",},
{{0xf2, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f2 0f 38 f8 88 78 56 34 12 \tenqcmd 0x12345678(%rax),%rcx",},
{{0x67, 0xf2, 0x0f, 0x38, 0xf8, 0x18, }, 6, 0, "", "",
"67 f2 0f 38 f8 18 \tenqcmd (%eax),%ebx",},
{{0x67, 0xf2, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"67 f2 0f 38 f8 88 78 56 34 12 \tenqcmd 0x12345678(%eax),%ecx",},
{{0xf3, 0x0f, 0x38, 0xf8, 0x18, }, 5, 0, "", "",
"f3 0f 38 f8 18 \tenqcmds (%rax),%rbx",},
{{0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%rax),%rcx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x18, }, 6, 0, "", "",
"67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
"0f 01 d7 \tenclu ",},
{{0x0f, 0x01, 0xc0, }, 3, 0, "", "",
"0f 01 c0 \tenclv ",},
{{0x0f, 0x01, 0xc5, }, 3, 0, "", "",
"0f 01 c5 \tpconfig ",},
{{0xf3, 0x0f, 0x09, }, 3, 0, "", "",
"f3 0f 09 \twbnoinvd ",},

View File

@ -1320,6 +1320,14 @@ int main(void)
asm volatile("xsaveopt (%r8)");
asm volatile("mfence");
/* cldemote m8 */
asm volatile("cldemote (%rax)");
asm volatile("cldemote (%r8)");
asm volatile("cldemote (0x12345678)");
asm volatile("cldemote 0x12345678(%rax,%rcx,8)");
asm volatile("cldemote 0x12345678(%r8,%rcx,8)");
/* xsavec mem */
asm volatile("xsavec (%rax)");
@ -1364,6 +1372,48 @@ int main(void)
asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
/* tpause */
asm volatile("tpause %ebx");
asm volatile("tpause %r8d");
/* umonitor */
asm volatile("umonitor %eax");
asm volatile("umonitor %rax");
asm volatile("umonitor %r8d");
/* umwait */
asm volatile("umwait %eax");
asm volatile("umwait %r8d");
/* movdiri */
asm volatile("movdiri %rax,(%rbx)");
asm volatile("movdiri %rcx,0x12345678(%rax)");
/* movdir64b */
asm volatile("movdir64b (%rax),%rbx");
asm volatile("movdir64b 0x12345678(%rax),%rcx");
asm volatile("movdir64b (%eax),%ebx");
asm volatile("movdir64b 0x12345678(%eax),%ecx");
/* enqcmd */
asm volatile("enqcmd (%rax),%rbx");
asm volatile("enqcmd 0x12345678(%rax),%rcx");
asm volatile("enqcmd (%eax),%ebx");
asm volatile("enqcmd 0x12345678(%eax),%ecx");
/* enqcmds */
asm volatile("enqcmds (%rax),%rbx");
asm volatile("enqcmds 0x12345678(%rax),%rcx");
asm volatile("enqcmds (%eax),%ebx");
asm volatile("enqcmds 0x12345678(%eax),%ecx");
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@ -2656,6 +2706,12 @@ int main(void)
asm volatile("xsaveopt (%eax)");
asm volatile("mfence");
/* cldemote m8 */
asm volatile("cldemote (%eax)");
asm volatile("cldemote (0x12345678)");
asm volatile("cldemote 0x12345678(%eax,%ecx,8)");
/* xsavec mem */
asm volatile("xsavec (%eax)");
@ -2684,8 +2740,61 @@ int main(void)
asm volatile("ptwritel (0x12345678)");
asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
/* tpause */
asm volatile("tpause %ebx");
/* umonitor */
asm volatile("umonitor %ax");
asm volatile("umonitor %eax");
/* umwait */
asm volatile("umwait %eax");
/* movdiri */
asm volatile("movdiri %eax,(%ebx)");
asm volatile("movdiri %ecx,0x12345678(%eax)");
/* movdir64b */
asm volatile("movdir64b (%eax),%ebx");
asm volatile("movdir64b 0x12345678(%eax),%ecx");
asm volatile("movdir64b (%si),%bx");
asm volatile("movdir64b 0x1234(%si),%cx");
/* enqcmd */
asm volatile("enqcmd (%eax),%ebx");
asm volatile("enqcmd 0x12345678(%eax),%ecx");
asm volatile("enqcmd (%si),%bx");
asm volatile("enqcmd 0x1234(%si),%cx");
/* enqcmds */
asm volatile("enqcmds (%eax),%ebx");
asm volatile("enqcmds 0x12345678(%eax),%ecx");
asm volatile("enqcmds (%si),%bx");
asm volatile("enqcmds 0x1234(%si),%cx");
#endif /* #ifndef __x86_64__ */
/* SGX */
asm volatile("encls");
asm volatile("enclu");
asm volatile("enclv");
/* pconfig */
asm volatile("pconfig");
/* wbnoinvd */
asm volatile("wbnoinvd");
/* Following line is a marker for the awk script - do not change */
asm volatile("rdtsc"); /* Stop here */

View File

@ -9,6 +9,7 @@
#include <sys/prctl.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
#include <perf/mmap.h>
#include "debug.h"
#include "parse-events.h"
@ -117,10 +118,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
for (i = 0; i < evlist->core.nr_mmaps; i++) {
md = &evlist->mmap[i];
if (perf_mmap__read_init(md) < 0)
if (perf_mmap__read_init(&md->core) < 0)
continue;
while ((event = perf_mmap__read_event(md)) != NULL) {
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
struct perf_sample sample;
if (event->header.type != PERF_RECORD_COMM ||
@ -139,9 +140,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
comm2_time = sample.time;
}
next_event:
perf_mmap__consume(md);
perf_mmap__consume(&md->core);
}
perf_mmap__read_done(md);
perf_mmap__read_done(&md->core);
}
if (!comm1_time || !comm2_time)

View File

@ -26,7 +26,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
bool found_bts = false;
intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
if (intel_pt_pmu)
intel_pt_pmu->auxtrace = true;
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
if (intel_bts_pmu)
intel_bts_pmu->auxtrace = true;
evlist__for_each_entry(evlist, evsel) {
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)

View File

@ -29,7 +29,7 @@ int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
return -1;
}
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
maps__for_each_entry(maps, pos) {
struct kmap *kmap;
size_t size;

View File

@ -113,6 +113,11 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
const struct perf_cpu_map *cpus = evlist->core.cpus;
bool privileged = perf_event_paranoid_check(-1);
if (opts->auxtrace_sample_mode) {
pr_err("Intel BTS does not support AUX area sampling\n");
return -EINVAL;
}
btsr->evlist = evlist;
btsr->snapshot_mode = opts->auxtrace_snapshot_mode;

View File

@ -17,6 +17,7 @@
#include "../../util/event.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/evsel_config.h"
#include "../../util/cpumap.h"
#include "../../util/mmap.h"
#include <subcmd/parse-options.h>
@ -551,6 +552,43 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
evsel->core.attr.config);
}
static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
struct evsel *evsel)
{
struct perf_evsel_config_term *term;
u64 user_bits = 0, bits;
term = perf_evsel__get_config_term(evsel, CFG_CHG);
if (term)
user_bits = term->val.cfg_chg;
bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period");
/* Did user change psb_period */
if (bits & user_bits)
return;
/* Set psb_period to 0 */
evsel->core.attr.config &= ~bits;
}
static void intel_pt_min_max_sample_sz(struct evlist *evlist,
size_t *min_sz, size_t *max_sz)
{
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
size_t sz = evsel->core.attr.aux_sample_size;
if (!sz)
continue;
if (min_sz && (sz < *min_sz || !*min_sz))
*min_sz = sz;
if (max_sz && sz > *max_sz)
*max_sz = sz;
}
}
/*
* Currently, there is not enough information to disambiguate different PEBS
* events, so only allow one.
@ -606,6 +644,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
return -EINVAL;
}
if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) {
pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n");
return -EINVAL;
}
if (opts->use_clockid) {
pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
return -EINVAL;
@ -617,6 +660,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
if (opts->auxtrace_sample_mode)
intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel);
err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
if (err)
return err;
@ -666,6 +712,34 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
opts->auxtrace_snapshot_size, psb_period);
}
/* Set default sizes for sample mode */
if (opts->auxtrace_sample_mode) {
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
size_t min_sz = 0, max_sz = 0;
intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz);
if (!opts->auxtrace_mmap_pages && !privileged &&
opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
if (!opts->auxtrace_mmap_pages) {
size_t sz = round_up(max_sz, page_size) / page_size;
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n",
max_sz,
opts->auxtrace_mmap_pages * (size_t)page_size);
return -EINVAL;
}
pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n",
min_sz, max_sz);
if (psb_period &&
min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR)
ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n",
min_sz, psb_period);
}
/* Set default sizes for full trace mode */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
@ -682,7 +756,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz;
if (opts->auxtrace_snapshot_mode)
if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode)
min_sz = KiB(4);
else
min_sz = KiB(8);
@ -1136,5 +1210,10 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
ptr->itr.read_finish = intel_pt_read_finish;
/*
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
* should give at least 1 PSB per sample.
*/
ptr->itr.default_aux_sample_size = 4096;
return &ptr->itr;
}

View File

@ -83,7 +83,7 @@ static void process_basic_block(struct addr_map_symbol *start,
struct addr_map_symbol *end,
struct branch_flags *flags)
{
struct symbol *sym = start->sym;
struct symbol *sym = start->ms.sym;
struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
struct block_range_iter iter;
struct block_range *entry;
@ -201,7 +201,7 @@ static int process_branch_callback(struct evsel *evsel,
if (a.map != NULL)
a.map->dso->hit = 1;
hist__account_cycles(sample->branch_stack, al, sample, false);
hist__account_cycles(sample->branch_stack, al, sample, false, NULL);
ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
return ret;
@ -301,9 +301,9 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
struct perf_annotate *ann)
{
if (!ann->use_stdio2)
return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts);
return symbol__tty_annotate(&he->ms, evsel, &ann->opts);
return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts);
return symbol__tty_annotate2(&he->ms, evsel, &ann->opts);
}
static void hists__find_annotations(struct hists *hists,

View File

@ -23,6 +23,8 @@
#include "util/time-utils.h"
#include "util/annotate.h"
#include "util/map.h"
#include "util/spark.h"
#include "util/block-info.h"
#include <linux/err.h>
#include <linux/zalloc.h>
#include <subcmd/pager.h>
@ -53,6 +55,7 @@ enum {
PERF_HPP_DIFF__FORMULA,
PERF_HPP_DIFF__DELTA_ABS,
PERF_HPP_DIFF__CYCLES,
PERF_HPP_DIFF__CYCLES_HIST,
PERF_HPP_DIFF__MAX_INDEX
};
@ -87,6 +90,7 @@ static bool force;
static bool show_period;
static bool show_formula;
static bool show_baseline_only;
static bool cycles_hist;
static unsigned int sort_compute = 1;
static s64 compute_wdiff_w1;
@ -95,8 +99,6 @@ static s64 compute_wdiff_w2;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
static struct addr_location dummy_al;
enum {
COMPUTE_DELTA,
COMPUTE_RATIO,
@ -164,6 +166,10 @@ static struct header_column {
[PERF_HPP_DIFF__CYCLES] = {
.name = "[Program Block Range] Cycles Diff",
.width = 70,
},
[PERF_HPP_DIFF__CYCLES_HIST] = {
.name = "stddev/Hist",
.width = NUM_SPARKS + 9,
}
};
@ -420,7 +426,8 @@ static int diff__process_sample_event(struct perf_tool *tool,
goto out_put;
}
hist__account_cycles(sample->branch_stack, &al, sample, false);
hist__account_cycles(sample->branch_stack, &al, sample, false,
NULL);
}
/*
@ -530,41 +537,6 @@ static void hists__baseline_only(struct hists *hists)
}
}
static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
int cmp;
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
return 0;
else if (!bi_l->sym)
return -1;
else
return 1;
}
if (bi_l->sym == bi_r->sym) {
if (bi_l->start == bi_r->start) {
if (bi_l->end == bi_r->end)
return 0;
else
return (int64_t)(bi_r->end - bi_l->end);
} else
return (int64_t)(bi_r->start - bi_l->start);
} else {
cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
return cmp;
}
if (bi_l->sym->start != bi_r->sym->start)
return (int64_t)(bi_r->sym->start - bi_l->sym->start);
return (int64_t)(bi_r->sym->end - bi_l->sym->end);
}
static int64_t block_cycles_diff_cmp(struct hist_entry *left,
struct hist_entry *right)
{
@ -593,64 +565,13 @@ static void init_block_hist(struct block_hist *bh)
INIT_LIST_HEAD(&bh->block_fmt.list);
INIT_LIST_HEAD(&bh->block_fmt.sort_list);
bh->block_fmt.cmp = block_cmp;
bh->block_fmt.cmp = block_info__cmp;
bh->block_fmt.sort = block_sort;
perf_hpp_list__register_sort_field(&bh->block_list,
&bh->block_fmt);
bh->valid = true;
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
struct cyc_hist *ch, int offset)
{
bi->sym = sym;
bi->start = ch->start;
bi->end = offset;
bi->cycles = ch->cycles;
bi->cycles_aggr = ch->cycles_aggr;
bi->num = ch->num;
bi->num_aggr = ch->num_aggr;
}
static int process_block_per_sym(struct hist_entry *he)
{
struct annotation *notes;
struct cyc_hist *ch;
struct block_hist *bh;
if (!he->ms.map || !he->ms.sym)
return 0;
notes = symbol__annotation(he->ms.sym);
if (!notes || !notes->src || !notes->src->cycles_hist)
return 0;
bh = container_of(he, struct block_hist, he);
init_block_hist(bh);
ch = notes->src->cycles_hist;
for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
if (ch[i].num_aggr) {
struct block_info *bi;
struct hist_entry *he_block;
bi = block_info__new();
if (!bi)
return -1;
init_block_info(bi, he->ms.sym, &ch[i], i);
he_block = hists__add_entry_block(&bh->block_hists,
&dummy_al, bi);
if (!he_block) {
block_info__put(bi);
return -1;
}
}
}
return 0;
}
static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
{
struct block_info *bi_a = a->block_info;
@ -689,6 +610,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
return NULL;
}
static void init_spark_values(unsigned long *svals, int num)
{
for (int i = 0; i < num; i++)
svals[i] = 0;
}
static void update_spark_value(unsigned long *svals, int num,
struct stats *stats, u64 val)
{
int n = stats->n;
if (n < num)
svals[n] = val;
}
static void compute_cycles_diff(struct hist_entry *he,
struct hist_entry *pair)
{
@ -697,6 +633,26 @@ static void compute_cycles_diff(struct hist_entry *he,
pair->diff.cycles =
pair->block_info->cycles_aggr / pair->block_info->num_aggr -
he->block_info->cycles_aggr / he->block_info->num_aggr;
if (!cycles_hist)
return;
init_stats(&pair->diff.stats);
init_spark_values(pair->diff.svals, NUM_SPARKS);
for (int i = 0; i < pair->block_info->num; i++) {
u64 val;
if (i >= he->block_info->num || i >= NUM_SPARKS)
break;
val = labs(pair->block_info->cycles_spark[i] -
he->block_info->cycles_spark[i]);
update_spark_value(pair->diff.svals, NUM_SPARKS,
&pair->diff.stats, val);
update_stats(&pair->diff.stats, val);
}
}
}
@ -720,13 +676,6 @@ static void block_hists_match(struct hists *hists_base,
}
}
static int filter_cb(struct hist_entry *he, void *arg __maybe_unused)
{
/* Skip the calculation of column length in output_resort */
he->filtered = true;
return 0;
}
static void hists__precompute(struct hists *hists)
{
struct rb_root_cached *root;
@ -747,8 +696,11 @@ static void hists__precompute(struct hists *hists)
he = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&he->rb_node_in);
if (compute == COMPUTE_CYCLES)
process_block_per_sym(he);
if (compute == COMPUTE_CYCLES) {
bh = container_of(he, struct block_hist, he);
init_block_hist(bh);
block_info__process_sym(he, bh, NULL, 0);
}
data__for_each_file_new(i, d) {
pair = get_pair_data(he, d);
@ -767,16 +719,18 @@ static void hists__precompute(struct hists *hists)
compute_wdiff(he, pair);
break;
case COMPUTE_CYCLES:
process_block_per_sym(pair);
bh = container_of(he, struct block_hist, he);
pair_bh = container_of(pair, struct block_hist,
he);
init_block_hist(pair_bh);
block_info__process_sym(pair, pair_bh, NULL, 0);
bh = container_of(he, struct block_hist, he);
if (bh->valid && pair_bh->valid) {
block_hists_match(&bh->block_hists,
&pair_bh->block_hists);
hists__output_resort_cb(&pair_bh->block_hists,
NULL, filter_cb);
hists__output_resort(&pair_bh->block_hists,
NULL);
}
break;
default:
@ -1255,6 +1209,9 @@ static const struct option options[] = {
"Show period values."),
OPT_BOOLEAN('F', "formula", &show_formula,
"Show formula."),
OPT_BOOLEAN(0, "cycles-hist", &cycles_hist,
"Show cycles histogram and standard deviation "
"- WARNING: use only with -c cycles."),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@ -1462,6 +1419,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt,
return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES);
}
static int all_zero(unsigned long *vals, int len)
{
int i;
for (i = 0; i < len; i++)
if (vals[i] != 0)
return 0;
return 1;
}
static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n)
{
int printed;
if (n <= 1)
return 0;
if (n > NUM_SPARKS)
n = NUM_SPARKS;
if (all_zero(svals, n))
return 0;
printed = print_spark(bf, size, svals, n);
printed += scnprintf(bf + printed, size - printed, " ");
return printed;
}
static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp, struct hist_entry *he)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
struct hist_entry *pair = get_pair_fmt(he, dfmt);
struct block_hist *bh = container_of(he, struct block_hist, he);
struct block_hist *bh_pair;
struct hist_entry *block_he;
char spark[32], buf[128];
double r;
int ret, pad;
if (!pair) {
if (bh->block_idx)
hpp->skip = true;
goto no_print;
}
bh_pair = container_of(pair, struct block_hist, he);
block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx);
if (!block_he) {
hpp->skip = true;
goto no_print;
}
ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals,
block_he->diff.stats.n);
r = rel_stddev_stats(stddev_stats(&block_he->diff.stats),
avg_stats(&block_he->diff.stats));
if (ret) {
/*
* Padding spaces if number of sparks less than NUM_SPARKS
* otherwise the output is not aligned.
*/
pad = NUM_SPARKS - ((ret - 1) / 3);
scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark);
ret = scnprintf(hpp->buf, hpp->size, "%*s",
dfmt->header_width, buf);
if (pad) {
ret += scnprintf(hpp->buf + ret, hpp->size - ret,
"%-*s", pad, " ");
}
return ret;
}
no_print:
return scnprintf(hpp->buf, hpp->size, "%*s",
dfmt->header_width, " ");
}
static void
hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
{
@ -1667,6 +1708,10 @@ static void data__hpp_register(struct data__file *d, int idx)
fmt->color = hpp__color_cycles;
fmt->sort = hist_entry__cmp_nop;
break;
case PERF_HPP_DIFF__CYCLES_HIST:
fmt->color = hpp__color_cycles_hist;
fmt->sort = hist_entry__cmp_nop;
break;
default:
fmt->sort = hist_entry__cmp_nop;
break;
@ -1692,10 +1737,14 @@ static int ui_init(void)
* PERF_HPP_DIFF__DELTA
* PERF_HPP_DIFF__RATIO
* PERF_HPP_DIFF__WEIGHTED_DIFF
* PERF_HPP_DIFF__CYCLES
*/
data__hpp_register(d, i ? compute_2_hpp[compute] :
PERF_HPP_DIFF__BASELINE);
if (cycles_hist && i)
data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST);
/*
* And the rest:
*
@ -1850,6 +1899,9 @@ int cmd_diff(int argc, const char **argv)
if (quiet)
perf_quiet_option();
if (cycles_hist && (compute != COMPUTE_CYCLES))
usage_with_options(diff_usage, options);
symbol__annotation_init();
if (symbol__init(NULL) < 0)

View File

@ -45,6 +45,7 @@ struct perf_inject {
u64 aux_id;
struct list_head samples;
struct itrace_synth_opts itrace_synth_opts;
char event_copy[PERF_SAMPLE_MAX_SIZE];
};
struct event_entry {
@ -214,6 +215,28 @@ static int perf_event__drop_aux(struct perf_tool *tool,
return 0;
}
static union perf_event *
perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
union perf_event *event,
struct perf_sample *sample)
{
size_t sz1 = sample->aux_sample.data - (void *)event;
size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
union perf_event *ev = (union perf_event *)inject->event_copy;
if (sz1 > event->header.size || sz2 > event->header.size ||
sz1 + sz2 > event->header.size ||
sz1 < sizeof(struct perf_event_header) + sizeof(u64))
return event;
memcpy(ev, event, sz1);
memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
ev->header.size = sz1 + sz2;
((u64 *)((void *)ev + sz1))[-1] = 0;
return ev;
}
typedef int (*inject_handler)(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@ -226,6 +249,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
struct evsel *evsel,
struct machine *machine)
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
if (evsel && evsel->handler) {
inject_handler f = evsel->handler;
return f(tool, event, sample, evsel, machine);
@ -233,6 +259,9 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
build_id__mark_dso_hit(tool, event, sample, evsel, machine);
if (inject->itrace_synth_opts.set && sample->aux_sample.size)
event = perf_inject__cut_auxtrace_sample(inject, event, sample);
return perf_event__repipe_synth(tool, event);
}
@ -578,58 +607,6 @@ static void strip_init(struct perf_inject *inject)
evsel->handler = drop_sample;
}
static bool has_tracking(struct evsel *evsel)
{
return evsel->core.attr.mmap || evsel->core.attr.mmap2 || evsel->core.attr.comm ||
evsel->core.attr.task;
}
#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
/*
* In order that the perf.data file is parsable, tracking events like MMAP need
* their selected event to exist, except if there is only 1 selected event left
* and it has a compatible sample type.
*/
static bool ok_to_remove(struct evlist *evlist,
struct evsel *evsel_to_remove)
{
struct evsel *evsel;
int cnt = 0;
bool ok = false;
if (!has_tracking(evsel_to_remove))
return true;
evlist__for_each_entry(evlist, evsel) {
if (evsel->handler != drop_sample) {
cnt += 1;
if ((evsel->core.attr.sample_type & COMPAT_MASK) ==
(evsel_to_remove->core.attr.sample_type & COMPAT_MASK))
ok = true;
}
}
return ok && cnt == 1;
}
static void strip_fini(struct perf_inject *inject)
{
struct evlist *evlist = inject->session->evlist;
struct evsel *evsel, *tmp;
/* Remove non-synthesized evsels if possible */
evlist__for_each_entry_safe(evlist, tmp, evsel) {
if (evsel->handler == drop_sample &&
ok_to_remove(evlist, evsel)) {
pr_debug("Deleting %s\n", perf_evsel__name(evsel));
evlist__remove(evlist, evsel);
evsel__delete(evsel);
}
}
}
static int __cmd_inject(struct perf_inject *inject)
{
int ret = -EINVAL;
@ -729,8 +706,6 @@ static int __cmd_inject(struct perf_inject *inject)
evlist__remove(session->evlist, evsel);
evsel__delete(evsel);
}
if (inject->strip)
strip_fini(inject);
}
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;

View File

@ -412,8 +412,8 @@ static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
sizeof(key), callcmp);
if (!caller) {
/* found */
if (node->map)
addr = map__unmap_ip(node->map, node->ip);
if (node->ms.map)
addr = map__unmap_ip(node->ms.map, node->ip);
else
addr = node->ip;

View File

@ -46,6 +46,7 @@
#include <semaphore.h>
#include <signal.h>
#include <math.h>
#include <perf/mmap.h>
static const char *get_filename_for_perf_kvm(void)
{
@ -759,14 +760,14 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
*mmap_time = ULLONG_MAX;
md = &evlist->mmap[idx];
err = perf_mmap__read_init(md);
err = perf_mmap__read_init(&md->core);
if (err < 0)
return (err == -EAGAIN) ? 0 : -1;
while ((event = perf_mmap__read_event(md)) != NULL) {
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
if (err) {
perf_mmap__consume(md);
perf_mmap__consume(&md->core);
pr_err("Failed to parse sample\n");
return -1;
}
@ -776,7 +777,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
*/
perf_mmap__consume(md);
perf_mmap__consume(&md->core);
if (err) {
pr_err("Failed to enqueue sample: %d\n", err);
@ -793,7 +794,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
break;
}
perf_mmap__read_done(md);
perf_mmap__read_done(&md->core);
return n;
}
@ -997,7 +998,7 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
done = perf_kvm__handle_stdin();
if (!rc && !done)
err = fdarray__poll(fda, 100);
err = evlist__poll(kvm->evlist, 100);
}
evlist__disable(kvm->evlist);

View File

@ -26,6 +26,7 @@ int cmd_list(int argc, const char **argv)
int i;
bool raw_dump = false;
bool long_desc_flag = false;
bool deprecated = false;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
OPT_BOOLEAN('d', "desc", &desc_flag,
@ -34,6 +35,8 @@ int cmd_list(int argc, const char **argv)
"Print longer event descriptions."),
OPT_BOOLEAN(0, "details", &details_flag,
"Print information on the perf event names and expressions used internally by events."),
OPT_BOOLEAN(0, "deprecated", &deprecated,
"Print deprecated events."),
OPT_INCR(0, "debug", &verbose,
"Enable debugging output"),
OPT_END()
@ -55,7 +58,7 @@ int cmd_list(int argc, const char **argv)
if (argc == 0) {
print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
details_flag);
details_flag, deprecated);
return 0;
}
@ -78,7 +81,8 @@ int cmd_list(int argc, const char **argv)
print_hwcache_events(NULL, raw_dump);
else if (strcmp(argv[i], "pmu") == 0)
print_pmu_events(NULL, raw_dump, !desc_flag,
long_desc_flag, details_flag);
long_desc_flag, details_flag,
deprecated);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
@ -91,7 +95,8 @@ int cmd_list(int argc, const char **argv)
if (sep == NULL) {
print_events(argv[i], raw_dump, !desc_flag,
long_desc_flag,
details_flag);
details_flag,
deprecated);
continue;
}
sep_idx = sep - argv[i];
@ -117,7 +122,8 @@ int cmd_list(int argc, const char **argv)
print_hwcache_events(s, raw_dump);
print_pmu_events(s, raw_dump, !desc_flag,
long_desc_flag,
details_flag);
details_flag,
deprecated);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
metricgroup__print(true, true, s, raw_dump, details_flag);

View File

@ -55,6 +55,9 @@
#include <signal.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/time64.h>
@ -91,8 +94,11 @@ struct record {
struct switch_output switch_output;
unsigned long long samples;
cpu_set_t affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
};
static volatile int done;
static volatile int auxtrace_record__snapshot_started;
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
static DEFINE_TRIGGER(switch_output_trigger);
@ -120,6 +126,12 @@ static bool switch_output_time(struct record *rec)
trigger_is_ready(&switch_output_trigger);
}
static bool record__output_max_size_exceeded(struct record *rec)
{
return rec->output_max_size &&
(rec->bytes_written >= rec->output_max_size);
}
static int record__write(struct record *rec, struct mmap *map __maybe_unused,
void *bf, size_t size)
{
@ -132,6 +144,13 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused,
rec->bytes_written += size;
if (record__output_max_size_exceeded(rec) && !done) {
fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
" stopping session ]\n",
rec->bytes_written >> 10);
done = 1;
}
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
@ -197,7 +216,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
* every aio write request started in record__aio_push() so
* decrement it because the request is now complete.
*/
perf_mmap__put(md);
perf_mmap__put(&md->core);
rc = 1;
} else {
/*
@ -276,7 +295,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
if (record__comp_enabled(aio->rec)) {
size = zstd_compress(aio->rec->session, aio->data + aio->size,
perf_mmap__mmap_len(map) - aio->size,
mmap__mmap_len(map) - aio->size,
buf, size);
} else {
memcpy(aio->data + aio->size, buf, size);
@ -293,7 +312,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
* after started aio request completion or at record__aio_push()
* if the request failed to start.
*/
perf_mmap__get(map);
perf_mmap__get(&map->core);
}
aio->size += size;
@ -332,7 +351,7 @@ static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
* map->refcount is decremented in record__aio_complete() after
* aio write operation finishes successfully.
*/
perf_mmap__put(map);
perf_mmap__put(&map->core);
}
return ret;
@ -488,7 +507,7 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
struct record *rec = to;
if (record__comp_enabled(rec)) {
size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size);
size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
bf = map->data;
}
@ -496,7 +515,6 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
return record__write(rec, map, bf, size);
}
static volatile int done;
static volatile int signr = -1;
static volatile int child_finished;
@ -537,7 +555,7 @@ static int record__process_auxtrace(struct perf_tool *tool,
size_t padding;
u8 pad[8] = {0};
if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
off_t file_offset;
int fd = perf_data__fd(data);
int err;
@ -662,6 +680,11 @@ static int record__auxtrace_init(struct record *rec)
if (err)
return err;
err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
rec->opts.auxtrace_sample_opts);
if (err)
return err;
return auxtrace_parse_filters(rec->evlist);
}
@ -699,10 +722,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
#endif
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
int fd;
scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
fd = open(kcore, O_RDONLY);
if (fd < 0)
return false;
close(fd);
return true;
}
static int record__kcore_copy(struct machine *machine, struct perf_data *data)
{
char from_dir[PATH_MAX];
char kcore_dir[PATH_MAX];
int ret;
snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
if (ret)
return ret;
return kcore_copy(from_dir, kcore_dir);
}
static int record__mmap_evlist(struct record *rec,
struct evlist *evlist)
{
struct record_opts *opts = &rec->opts;
bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
opts->auxtrace_sample_mode;
char msg[512];
if (opts->affinity != PERF_AFFINITY_SYS)
@ -710,7 +766,7 @@ static int record__mmap_evlist(struct record *rec,
if (evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
auxtrace_overwrite,
opts->nr_cblocks, opts->affinity,
opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
@ -997,6 +1053,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
}
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
!rec->opts.auxtrace_sample_mode &&
record__auxtrace_mmap_read(rec, map) != 0) {
rc = -1;
goto out;
@ -1272,6 +1329,15 @@ static int record__synthesize(struct record *rec, bool tail)
if (err)
goto out;
/* Synthesize id_index before auxtrace_info */
if (rec->opts.auxtrace_sample_mode) {
err = perf_event__synthesize_id_index(tool,
process_synthesized_event,
session->evlist, machine);
if (err)
goto out;
}
if (rec->opts.full_auxtrace) {
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
session, process_synthesized_event);
@ -1383,6 +1449,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
session->header.env.comp_type = PERF_COMP_ZSTD;
session->header.env.comp_level = rec->opts.comp_level;
if (rec->opts.kcore &&
!record__kcore_readable(&session->machines.host)) {
pr_err("ERROR: kcore is not readable.\n");
return -1;
}
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@ -1414,6 +1486,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
if (rec->opts.kcore) {
err = record__kcore_copy(&session->machines.host, data);
if (err) {
pr_err("ERROR: Failed to copy kcore\n");
goto out_child;
}
}
err = bpf__apply_obj_config();
if (err) {
char errbuf[BUFSIZ];
@ -1936,6 +2016,33 @@ static int record__parse_affinity(const struct option *opt, const char *str, int
return 0;
}
static int parse_output_max_size(const struct option *opt,
const char *str, int unset)
{
unsigned long *s = (unsigned long *)opt->value;
static struct parse_tag tags_size[] = {
{ .tag = 'B', .mult = 1 },
{ .tag = 'K', .mult = 1 << 10 },
{ .tag = 'M', .mult = 1 << 20 },
{ .tag = 'G', .mult = 1 << 30 },
{ .tag = 0 },
};
unsigned long val;
if (unset) {
*s = 0;
return 0;
}
val = parse_tag_value(str, tags_size);
if (val != (unsigned long) -1) {
*s = val;
return 0;
}
return -1;
}
static int record__parse_mmap_pages(const struct option *opt,
const char *str,
int unset __maybe_unused)
@ -2058,6 +2165,31 @@ static const char * const __record_usage[] = {
};
const char * const *record_usage = __record_usage;
static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine)
{
/*
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
* no need to add them twice.
*/
if (!(event->header.misc & PERF_RECORD_MISC_USER))
return 0;
return perf_event__process_mmap(tool, event, sample, machine);
}
static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct machine *machine)
{
/*
* We already have the kernel maps, put in place via perf_session__create_kernel_maps()
* no need to add them twice.
*/
if (!(event->header.misc & PERF_RECORD_MISC_USER))
return 0;
return perf_event__process_mmap2(tool, event, sample, machine);
}
/*
* XXX Ideally would be local to cmd_record() and passed to a record__new
* because we need to have access to it in record__exit, that is called
@ -2087,8 +2219,8 @@ static struct record record = {
.exit = perf_event__process_exit,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.mmap = build_id__process_mmap,
.mmap2 = build_id__process_mmap2,
.ordered_events = true,
},
};
@ -2184,6 +2316,7 @@ static struct option __record_options[] = {
parse_cgroups),
OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
"ms to wait before starting measurement after program start"),
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
"user to profile"),
@ -2213,6 +2346,8 @@ static struct option __record_options[] = {
parse_clockid),
OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
"opts", "AUX area tracing Snapshot Mode", ""),
OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
"opts", "sample AUX area", ""),
OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
@ -2262,6 +2397,8 @@ static struct option __record_options[] = {
"n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
record__parse_comp_level),
#endif
OPT_CALLBACK(0, "max-size", &record.output_max_size,
"size", "Limit the maximum size of the output file", parse_output_max_size),
OPT_END()
};
@ -2322,6 +2459,9 @@ int cmd_record(int argc, const char **argv)
}
if (rec->opts.kcore)
rec->data.is_dir = true;
if (rec->opts.comp_level != 0) {
pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
rec->no_buildid = true;

View File

@ -51,6 +51,7 @@
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
#include "util/block-info.h"
#include <dlfcn.h>
#include <errno.h>
@ -96,10 +97,13 @@ struct report {
float min_percent;
u64 nr_entries;
u64 queue_size;
u64 total_cycles;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
bool symbol_ipc;
bool total_cycles_mode;
struct block_report *block_reports;
};
static int report__config(const char *var, const char *value, void *cb)
@ -290,9 +294,10 @@ static int process_sample_event(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
if (ui__has_annotation() || rep->symbol_ipc) {
if (ui__has_annotation() || rep->symbol_ipc || rep->total_cycles_mode) {
hist__account_cycles(sample->branch_stack, &al, sample,
rep->nonany_branch_mode);
rep->nonany_branch_mode,
&rep->total_cycles);
}
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
@ -399,6 +404,13 @@ static int report__setup_sample_type(struct report *rep)
PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
#ifndef HAVE_LIBUNWIND_SUPPORT
if (dwarf_callchain_users) {
ui__warning("Please install libunwind development packages "
"during the perf build.\n");
}
#endif
return 0;
}
@ -473,11 +485,30 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
return ret + fprintf(fp, "\n#\n");
}
static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
struct report *rep)
{
struct evsel *pos;
int i = 0, ret;
evlist__for_each_entry(evlist, pos) {
ret = report__browse_block_hists(&rep->block_reports[i++].hist,
rep->min_percent, pos,
&rep->session->header.env,
&rep->annotation_opts);
if (ret != 0)
return ret;
}
return 0;
}
static int perf_evlist__tty_browse_hists(struct evlist *evlist,
struct report *rep,
const char *help)
{
struct evsel *pos;
int i = 0;
if (!quiet) {
fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
@ -493,6 +524,14 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
if (rep->total_cycles_mode) {
report__browse_block_hists(&rep->block_reports[i++].hist,
rep->min_percent, pos,
NULL, NULL);
continue;
}
hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
!(symbol_conf.use_callchain ||
symbol_conf.show_branchflag_count));
@ -575,6 +614,11 @@ static int report__browse_hists(struct report *rep)
switch (use_browser) {
case 1:
if (rep->total_cycles_mode) {
ret = perf_evlist__tui_block_hists_browse(evlist, rep);
break;
}
ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
rep->min_percent,
&session->header.env,
@ -639,7 +683,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg)
if (rep->symbol_ipc && sym && !sym->annotate2) {
struct evsel *evsel = hists_to_evsel(he->hists);
symbol__annotate2(sym, he->ms.map, evsel,
symbol__annotate2(&he->ms, evsel,
&annotation__default_options, NULL);
}
@ -720,11 +764,9 @@ static struct task *tasks_list(struct task *task, struct machine *machine)
static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
{
size_t printed = 0;
struct rb_node *nd;
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *map = rb_entry(nd, struct map, rb_node);
struct map *map;
maps__for_each_entry(maps, map) {
printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
indent, "", map->start, map->end,
map->prot & PROT_READ ? 'r' : '-',
@ -732,7 +774,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
map->prot & PROT_EXEC ? 'x' : '-',
map->flags & MAP_SHARED ? 's' : 'p',
map->pgoff,
map->ino, map->dso->name);
map->dso->id.ino, map->dso->name);
}
return printed;
@ -920,6 +962,13 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
if (rep->total_cycles_mode) {
rep->block_reports = block_info__create_report(session->evlist,
rep->total_cycles);
if (!rep->block_reports)
return -1;
}
return report__browse_hists(rep);
}
@ -1204,6 +1253,8 @@ int cmd_report(int argc, const char **argv)
"Set time quantum for time sort key (default 100ms)",
parse_time_quantum),
OPTS_EVSWITCH(&report.evswitch),
OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode,
"Sort all blocks by 'Sampled Cycles%'"),
OPT_END()
};
struct perf_data data = {
@ -1366,6 +1417,13 @@ int cmd_report(int argc, const char **argv)
goto error;
}
if (report.total_cycles_mode) {
if (sort__mode != SORT_MODE__BRANCH)
report.total_cycles_mode = false;
else
sort_order = NULL;
}
if (strcmp(input_name, "-") != 0)
setup_browser(true);
else
@ -1416,7 +1474,8 @@ int cmd_report(int argc, const char **argv)
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
if (ui__has_annotation() || report.symbol_ipc) {
if (ui__has_annotation() || report.symbol_ipc ||
report.total_cycles_mode) {
ret = symbol__annotation_init();
if (ret < 0)
goto error;
@ -1477,6 +1536,10 @@ int cmd_report(int argc, const char **argv)
itrace_synth_opts__clear_time_range(&itrace_synth_opts);
zfree(&report.ptime_range);
}
if (report.block_reports)
zfree(&report.block_reports);
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;

View File

@ -2172,7 +2172,7 @@ static void save_task_callchain(struct perf_sched *sched,
if (node == NULL)
break;
sym = node->sym;
sym = node->ms.sym;
if (sym) {
if (!strcmp(sym->name, "schedule") ||
!strcmp(sym->name, "__schedule") ||

View File

@ -3605,11 +3605,6 @@ int cmd_script(int argc, const char **argv)
}
}
if (script.time_str && reltime) {
fprintf(stderr, "Don't combine --reltime with --time\n");
return -1;
}
if (itrace_synth_opts.callchain &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
@ -3869,10 +3864,11 @@ int cmd_script(int argc, const char **argv)
goto out_delete;
if (script.time_str) {
err = perf_time__parse_for_ranges(script.time_str, session,
err = perf_time__parse_for_ranges_reltime(script.time_str, session,
&script.ptime_range,
&script.range_size,
&script.range_num);
&script.range_num,
reltime);
if (err < 0)
goto out_delete;

View File

@ -792,6 +792,8 @@ static struct option stat_options[] = {
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
"ms to wait before starting measurement after program start"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
@ -803,6 +805,12 @@ static struct option stat_options[] = {
OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
"monitor specified metrics or metric groups (separated by ,)",
parse_metric_groups),
OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
"Configure all used events to run in kernel space.",
PARSE_OPT_EXCLUSIVE),
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
OPT_END()
};
@ -824,6 +832,12 @@ static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
return cpu_map__get_core(map, cpu, NULL);
}
static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
{
return cpu_map__get_node(map, cpu, NULL);
}
static int perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
{
@ -858,6 +872,12 @@ static int perf_stat__get_core_cached(struct perf_stat_config *config,
return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
}
static int perf_stat__get_node_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
{
return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
}
static bool term_percore_set(void)
{
struct evsel *counter;
@ -896,6 +916,13 @@ static int perf_stat_init_aggr_mode(void)
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
break;
case AGGR_NODE:
if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_cached;
break;
case AGGR_NONE:
if (term_percore_set()) {
if (cpu_map__build_core_map(evsel_list->core.cpus,
@ -1008,6 +1035,13 @@ static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
return core;
}
static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
{
int cpu = perf_env__get_cpu(data, map, idx);
return perf_env__numa_node(data, cpu);
}
static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct perf_cpu_map **sockp)
{
@ -1026,6 +1060,12 @@ static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *c
return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
}
static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct perf_cpu_map **nodep)
{
return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
}
static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
{
@ -1043,6 +1083,12 @@ static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unus
return perf_env__get_core(map, idx, &perf_stat.session->header.env);
}
static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
{
return perf_env__get_node(map, idx, &perf_stat.session->header.env);
}
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
@ -1069,6 +1115,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
}
stat_config.aggr_get_id = perf_stat__get_core_file;
break;
case AGGR_NODE:
if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_file;
break;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
@ -1254,6 +1307,7 @@ static int add_default_attributes(void)
if (stat_config.null_run)
return 0;
bzero(&errinfo, sizeof(errinfo));
if (transaction_run) {
/* Handle -T as -M transaction. Once platform specific metrics
* support has been added to the json files, all archictures
@ -1311,6 +1365,7 @@ static int add_default_attributes(void)
return -1;
}
if (err) {
parse_events_print_error(&errinfo, smi_cost_attrs);
fprintf(stderr, "Cannot set up SMI cost events\n");
return -1;
}
@ -1616,6 +1671,8 @@ static int __cmd_report(int argc, const char **argv)
"aggregate counts per processor die", AGGR_DIE),
OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_END()
@ -1890,6 +1947,9 @@ int cmd_stat(int argc, const char **argv)
}
}
if (stat_config.aggr_mode == AGGR_NODE)
cpu__setup_cpunode_map();
if (stat_config.times && interval)
interval_count = true;
else if (stat_config.times && !interval) {

View File

@ -82,6 +82,7 @@
#include <linux/err.h>
#include <linux/ctype.h>
#include <perf/mmap.h>
static volatile int done;
static volatile int resize;
@ -142,12 +143,12 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL);
if (err == 0) {
top->sym_filter_entry = he;
} else {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
symbol__strerror_disassemble(&he->ms, err, msg, sizeof(msg));
pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
}
@ -256,7 +257,7 @@ static void perf_top__show_details(struct perf_top *top)
printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts);
if (top->evlist->enabled) {
if (top->zero)
@ -724,7 +725,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY),
NULL);
return 0;
}
@ -869,10 +871,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
union perf_event *event;
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
if (perf_mmap__read_init(md) < 0)
if (perf_mmap__read_init(&md->core) < 0)
return;
while ((event = perf_mmap__read_event(md)) != NULL) {
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
int ret;
ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
@ -883,7 +885,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
if (ret)
break;
perf_mmap__consume(md);
perf_mmap__consume(&md->core);
if (top->qe.rotate) {
pthread_mutex_lock(&top->qe.mutex);
@ -893,7 +895,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
}
}
perf_mmap__read_done(md);
perf_mmap__read_done(&md->core);
}
static void perf_top__mmap_read(struct perf_top *top)
@ -1560,6 +1562,17 @@ int cmd_top(int argc, const char **argv)
status = perf_config(perf_top_config, &top);
if (status)
return status;
/*
* Since the per arch annotation init routine may need the cpuid, read
* it here, since we are not getting this from the perf.data header.
*/
status = perf_env__read_cpuid(&perf_env);
if (status) {
pr_err("Couldn't read the cpuid for this machine: %s\n",
str_error_r(errno, errbuf, sizeof(errbuf)));
goto out_delete_evlist;
}
top.evlist->env = &perf_env;
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,9 @@ arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/msr-index.h
arch/x86/include/uapi/asm/prctl.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
@ -116,7 +119,7 @@ check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B
check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"'
check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl

View File

@ -3,6 +3,7 @@ libperf-y += cpumap.o
libperf-y += threadmap.o
libperf-y += evsel.o
libperf-y += evlist.o
libperf-y += mmap.o
libperf-y += zalloc.o
libperf-y += xyarray.o
libperf-y += lib.o

View File

@ -107,6 +107,7 @@ else
endif
LIBAPI = $(API_PATH)libapi.a
export LIBAPI
$(LIBAPI): FORCE
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
@ -172,8 +173,9 @@ install_headers:
$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644);
$(call do_install,include/perf/event.h,$(prefix)/include/perf,644);
$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \

View File

@ -5,11 +5,12 @@
#include <stdio.h>
#include <stdarg.h>
#include <unistd.h>
#include <linux/compiler.h>
#include <perf/core.h>
#include <internal/lib.h>
#include "internal.h"
static int __base_pr(enum libperf_print_level level, const char *format,
static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format,
va_list args)
{
return vfprintf(stderr, format, args);

View File

@ -8,13 +8,20 @@
#include <internal/evlist.h>
#include <internal/evsel.h>
#include <internal/xyarray.h>
#include <internal/mmap.h>
#include <internal/cpumap.h>
#include <internal/threadmap.h>
#include <internal/xyarray.h>
#include <internal/lib.h>
#include <linux/zalloc.h>
#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <sys/mman.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
@ -27,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist)
INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
fdarray__init(&evlist->pollfd, 64);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@ -101,8 +109,36 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev)
return next;
}
static void perf_evlist__purge(struct perf_evlist *evlist)
{
struct perf_evsel *pos, *n;
perf_evlist__for_each_entry_safe(evlist, n, pos) {
list_del_init(&pos->node);
perf_evsel__delete(pos);
}
evlist->nr_entries = 0;
}
void perf_evlist__exit(struct perf_evlist *evlist)
{
perf_cpu_map__put(evlist->cpus);
perf_thread_map__put(evlist->threads);
evlist->cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
}
void perf_evlist__delete(struct perf_evlist *evlist)
{
if (evlist == NULL)
return;
perf_evlist__munmap(evlist);
perf_evlist__close(evlist);
perf_evlist__purge(evlist);
perf_evlist__exit(evlist);
free(evlist);
}
@ -277,7 +313,328 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
return pos;
}
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
void *arg __maybe_unused)
{
struct perf_mmap *map = fda->priv[fd].ptr;
if (map)
perf_mmap__put(map);
}
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
return fdarray__filter(&evlist->pollfd, revents_and_mask,
perf_evlist__munmap_filtered, NULL);
}
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
return fdarray__poll(&evlist->pollfd, timeout);
}
static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite)
{
int i;
struct perf_mmap *map;
map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
if (!map)
return NULL;
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *prev = i ? &map[i - 1] : NULL;
/*
* When the perf_mmap() call is made we grab one refcount, plus
* one extra to let perf_mmap__consume() get the last
* events after all real references (perf_mmap__get()) are
* dropped.
*
* Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
* thus does perf_mmap__get() on it.
*/
perf_mmap__init(&map[i], prev, overwrite, NULL);
}
return map;
}
static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
struct perf_evsel *evsel, int idx, int cpu,
int thread)
{
struct perf_sample_id *sid = SID(evsel, cpu, thread);
sid->idx = idx;
if (evlist->cpus && cpu >= 0)
sid->cpu = evlist->cpus->map[cpu];
else
sid->cpu = -1;
if (!evsel->system_wide && evlist->threads && thread >= 0)
sid->tid = perf_thread_map__pid(evlist->threads, thread);
else
sid->tid = -1;
}
static struct perf_mmap*
perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
{
struct perf_mmap *maps;
maps = overwrite ? evlist->mmap_ovw : evlist->mmap;
if (!maps) {
maps = perf_evlist__alloc_mmap(evlist, overwrite);
if (!maps)
return NULL;
if (overwrite)
evlist->mmap_ovw = maps;
else
evlist->mmap = maps;
}
return &maps[idx];
}
#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
int output, int cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map,
bool overwrite)
{
if (overwrite)
evlist->mmap_ovw_first = map;
else
evlist->mmap_first = map;
}
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
{
int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
perf_evlist__for_each_entry(evlist, evsel) {
bool overwrite = evsel->attr.write_backward;
struct perf_mmap *map;
int *output, fd, cpu;
if (evsel->system_wide && thread)
continue;
cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu);
if (cpu == -1)
continue;
map = ops->get(evlist, overwrite, idx);
if (map == NULL)
return -ENOMEM;
if (overwrite) {
mp->prot = PROT_READ;
output = _output_overwrite;
} else {
mp->prot = PROT_READ | PROT_WRITE;
output = _output;
}
fd = FD(evsel, cpu, thread);
if (*output == -1) {
*output = fd;
/*
* The last one will be done at perf_mmap__consume(), so that we
* make sure we don't prevent tools from consuming every last event in
* the ring buffer.
*
* I.e. we can get the POLLHUP meaning that the fd doesn't exist
* anymore, but the last events for it are still in the ring buffer,
* waiting to be consumed.
*
* Tools can chose to ignore this at their own discretion, but the
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
refcount_set(&map->refcnt, 2);
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1;
perf_mmap__get(map);
}
revent = !overwrite ? POLLIN : 0;
if (!evsel->system_wide &&
perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
perf_mmap__put(map);
return -1;
}
if (evsel->attr.read_format & PERF_FORMAT_ID) {
if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
fd) < 0)
return -1;
perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
thread);
}
}
return 0;
}
static int
mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int thread;
int nr_threads = perf_thread_map__nr(evlist->threads);
for (thread = 0; thread < nr_threads; thread++) {
int output = -1;
int output_overwrite = -1;
if (ops->idx)
ops->idx(evlist, mp, thread, false);
if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
&output, &output_overwrite))
goto out_unmap;
}
return 0;
out_unmap:
perf_evlist__munmap(evlist);
return -1;
}
static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
int nr_cpus = perf_cpu_map__nr(evlist->cpus);
int cpu, thread;
for (cpu = 0; cpu < nr_cpus; cpu++) {
int output = -1;
int output_overwrite = -1;
if (ops->idx)
ops->idx(evlist, mp, cpu, true);
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
thread, &output, &output_overwrite))
goto out_unmap;
}
}
return 0;
out_unmap:
perf_evlist__munmap(evlist);
return -1;
}
static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
nr_mmaps = perf_cpu_map__nr(evlist->cpus);
if (perf_cpu_map__empty(evlist->cpus))
nr_mmaps = perf_thread_map__nr(evlist->threads);
return nr_mmaps;
}
int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->cpus;
const struct perf_thread_map *threads = evlist->threads;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
mp->mask = evlist->mmap_len - page_size - 1;
evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist);
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
return -ENOMEM;
}
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
if (perf_cpu_map__empty(cpus))
return mmap_per_thread(evlist, ops, mp);
return mmap_per_cpu(evlist, ops, mp);
}
int perf_evlist__mmap(struct perf_evlist *evlist, int pages)
{
struct perf_mmap_param mp;
struct perf_evlist_mmap_ops ops = {
.get = perf_evlist__mmap_cb_get,
.mmap = perf_evlist__mmap_cb_mmap,
};
evlist->mmap_len = (pages + 1) * page_size;
return perf_evlist__mmap_ops(evlist, &ops, &mp);
}
void perf_evlist__munmap(struct perf_evlist *evlist)
{
int i;
if (evlist->mmap) {
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap[i]);
}
if (evlist->mmap_ovw) {
for (i = 0; i < evlist->nr_mmaps; i++)
perf_mmap__munmap(&evlist->mmap_ovw[i]);
}
zfree(&evlist->mmap);
zfree(&evlist->mmap_ovw);
}
struct perf_mmap*
perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
bool overwrite)
{
if (map)
return map->next;
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}

View File

@ -120,7 +120,8 @@ void perf_evsel__close_fd(struct perf_evsel *evsel)
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
close(FD(evsel, cpu, thread));
if (FD(evsel, cpu, thread) >= 0)
close(FD(evsel, cpu, thread));
FD(evsel, cpu, thread) = -1;
}
}

View File

@ -11,6 +11,7 @@
struct perf_cpu_map;
struct perf_thread_map;
struct perf_mmap_param;
struct perf_evlist {
struct list_head entries;
@ -22,12 +23,36 @@ struct perf_evlist {
size_t mmap_len;
struct fdarray pollfd;
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
struct perf_mmap *mmap;
struct perf_mmap *mmap_ovw;
struct perf_mmap *mmap_first;
struct perf_mmap *mmap_ovw_first;
};
typedef void
(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool);
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
perf_evlist_mmap__cb_get_t get;
perf_evlist_mmap__cb_mmap_t mmap;
};
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
void *ptr, short revent);
int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp);
void perf_evlist__init(struct perf_evlist *evlist);
void perf_evlist__exit(struct perf_evlist *evlist);
/**
* __perf_evlist__for_each_entry - iterate thru all the evsels
* @list: list_head instance to iterate
@ -60,6 +85,24 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
#define perf_evlist__for_each_entry_reverse(evlist, evsel) \
__perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
/**
* __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
* @list: list_head instance to iterate
* @tmp: struct evsel temp iterator
* @evsel: struct evsel iterator
*/
#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \
list_for_each_entry_safe(evsel, tmp, list, node)
/**
* perf_evlist__for_each_entry_safe - safely iterate thru all the evsels
* @evlist: evlist instance to iterate
* @evsel: struct evsel iterator
* @tmp: struct evsel temp iterator
*/
#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \
__perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
{
return list_entry(evlist->entries.next, struct perf_evsel, node);

View File

@ -50,6 +50,7 @@ struct perf_evsel {
bool system_wide;
};
void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
void perf_evsel__close_fd(struct perf_evsel *evsel);
void perf_evsel__free_fd(struct perf_evsel *evsel);

View File

@ -10,23 +10,46 @@
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
struct perf_mmap;
typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap {
void *base;
int mask;
int fd;
int cpu;
refcount_t refcnt;
u64 prev;
u64 start;
u64 end;
bool overwrite;
u64 flush;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
void *base;
int mask;
int fd;
int cpu;
refcount_t refcnt;
u64 prev;
u64 start;
u64 end;
bool overwrite;
u64 flush;
libperf_unmap_cb_t unmap_cb;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
struct perf_mmap *next;
};
struct perf_mmap_param {
int prot;
int mask;
};
size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
int fd, int cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);
u64 perf_mmap__read_head(struct perf_mmap *map);
#endif /* __LIBPERF_INTERNAL_MMAP_H */

View File

@ -4,14 +4,28 @@
#include <stdio.h>
#define __T_START fprintf(stdout, "- running %s...", __FILE__)
#define __T_OK fprintf(stdout, "OK\n")
#define __T_FAIL fprintf(stdout, "FAIL\n")
int tests_failed;
#define __T_START \
do { \
fprintf(stdout, "- running %s...", __FILE__); \
fflush(NULL); \
tests_failed = 0; \
} while (0)
#define __T_END \
do { \
if (tests_failed) \
fprintf(stdout, " FAILED (%d)\n", tests_failed); \
else \
fprintf(stdout, "OK\n"); \
} while (0)
#define __T(text, cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
tests_failed++; \
return -1; \
} \
} while (0)

View File

@ -9,9 +9,12 @@
#endif
enum libperf_print_level {
LIBPERF_ERR,
LIBPERF_WARN,
LIBPERF_INFO,
LIBPERF_DEBUG,
LIBPERF_DEBUG2,
LIBPERF_DEBUG3,
};
typedef int (*libperf_print_fn_t)(enum libperf_print_level level,

Some files were not shown because too many files have changed in this diff Show More