mirror of https://gitee.com/openkylin/linux.git
Merge branches 'oprofile-v2' and 'timers/hpet' into x86/core-v4
This commit is contained in:
commit
c00193f9f0
|
@ -159,8 +159,6 @@ hayes-esp.txt
|
|||
- info on using the Hayes ESP serial driver.
|
||||
highuid.txt
|
||||
- notes on the change from 16 bit to 32 bit user/group IDs.
|
||||
hpet.txt
|
||||
- High Precision Event Timer Driver for Linux.
|
||||
timers/
|
||||
- info on the timer related topics
|
||||
hw_random.txt
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
00-INDEX
|
||||
- this file
|
||||
highres.txt
|
||||
- High resolution timers and dynamic ticks design notes
|
||||
hpet.txt
|
||||
- High Precision Event Timer Driver for Linux
|
||||
hrtimers.txt
|
||||
- subsystem for high-resolution kernel timers
|
||||
timer_stats.txt
|
||||
- timer usage statistics
|
|
@ -1,21 +1,32 @@
|
|||
High Precision Event Timer Driver for Linux
|
||||
|
||||
The High Precision Event Timer (HPET) hardware is the future replacement
|
||||
for the 8254 and Real Time Clock (RTC) periodic timer functionality.
|
||||
Each HPET can have up to 32 timers. It is possible to configure the
|
||||
first two timers as legacy replacements for 8254 and RTC periodic timers.
|
||||
A specification done by Intel and Microsoft can be found at
|
||||
<http://www.intel.com/technology/architecture/hpetspec.htm>.
|
||||
The High Precision Event Timer (HPET) hardware follows a specification
|
||||
by Intel and Microsoft which can be found at
|
||||
|
||||
http://www.intel.com/technology/architecture/hpetspec.htm
|
||||
|
||||
Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
|
||||
and up to 32 comparators. Normally three or more comparators are provided,
|
||||
each of which can generate oneshot interupts and at least one of which has
|
||||
additional hardware to support periodic interrupts. The comparators are
|
||||
also called "timers", which can be misleading since usually timers are
|
||||
independent of each other ... these share a counter, complicating resets.
|
||||
|
||||
HPET devices can support two interrupt routing modes. In one mode, the
|
||||
comparators are additional interrupt sources with no particular system
|
||||
role. Many x86 BIOS writers don't route HPET interrupts at all, which
|
||||
prevents use of that mode. They support the other "legacy replacement"
|
||||
mode where the first two comparators block interrupts from 8254 timers
|
||||
and from the RTC.
|
||||
|
||||
The driver supports detection of HPET driver allocation and initialization
|
||||
of the HPET before the driver module_init routine is called. This enables
|
||||
platform code which uses timer 0 or 1 as the main timer to intercept HPET
|
||||
initialization. An example of this initialization can be found in
|
||||
arch/i386/kernel/time_hpet.c.
|
||||
arch/x86/kernel/hpet.c.
|
||||
|
||||
The driver provides two APIs which are very similar to the API found in
|
||||
the rtc.c driver. There is a user space API and a kernel space API.
|
||||
An example user space program is provided below.
|
||||
The driver provides a userspace API which resembles the API found in the
|
||||
RTC driver framework. An example user space program is provided below.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -286,15 +297,3 @@ out:
|
|||
|
||||
return;
|
||||
}
|
||||
|
||||
The kernel API has three interfaces exported from the driver:
|
||||
|
||||
hpet_register(struct hpet_task *tp, int periodic)
|
||||
hpet_unregister(struct hpet_task *tp)
|
||||
hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
|
||||
|
||||
The kernel module using this interface fills in the ht_func and ht_data
|
||||
members of the hpet_task structure before calling hpet_register.
|
||||
hpet_control simply vectors to the hpet_ioctl routine and has the same
|
||||
commands and respective arguments as the user API. hpet_unregister
|
||||
is used to terminate usage of the HPET timer reserved by hpet_register.
|
14
arch/Kconfig
14
arch/Kconfig
|
@ -13,6 +13,20 @@ config OPROFILE
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config OPROFILE_IBS
|
||||
bool "OProfile AMD IBS support (EXPERIMENTAL)"
|
||||
default n
|
||||
depends on OPROFILE && SMP && X86
|
||||
help
|
||||
Instruction-Based Sampling (IBS) is a new profiling
|
||||
technique that provides rich, precise program performance
|
||||
information. IBS is introduced by AMD Family10h processors
|
||||
(AMD Opteron Quad-Core processor “Barcelona”) to overcome
|
||||
the limitations of conventional performance counter
|
||||
sampling.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config HAVE_OPROFILE
|
||||
def_bool n
|
||||
|
||||
|
|
|
@ -295,6 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
|||
*
|
||||
* Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
|
||||
* MCE interrupts are supported. Thus MCE offset must be set to 0.
|
||||
*
|
||||
* If mask=1, the LVT entry does not generate interrupts while mask=0
|
||||
* enables the vector. See also the BKDGs.
|
||||
*/
|
||||
|
||||
#define APIC_EILVT_LVTOFF_MCE 0
|
||||
|
@ -319,6 +322,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
|
|||
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
|
||||
return APIC_EILVT_LVTOFF_IBS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
|
||||
|
||||
/*
|
||||
* Program the next event, relative to now
|
||||
|
|
|
@ -307,6 +307,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
|
|||
*
|
||||
* Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
|
||||
* MCE interrupts are supported. Thus MCE offset must be set to 0.
|
||||
*
|
||||
* If mask=1, the LVT entry does not generate interrupts while mask=0
|
||||
* enables the vector. See also the BKDGs.
|
||||
*/
|
||||
|
||||
#define APIC_EILVT_LVTOFF_MCE 0
|
||||
|
@ -331,6 +334,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
|
|||
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
|
||||
return APIC_EILVT_LVTOFF_IBS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
|
||||
|
||||
/*
|
||||
* Program the next event, relative to now
|
||||
|
|
|
@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id)
|
|||
hd.hd_phys_address = hpet_address;
|
||||
hd.hd_address = hpet;
|
||||
hd.hd_nirqs = nrtimers;
|
||||
hd.hd_flags = HPET_DATA_PLATFORM;
|
||||
hpet_reserve_timer(&hd, 0);
|
||||
|
||||
#ifdef CONFIG_HPET_EMULATE_RTC
|
||||
hpet_reserve_timer(&hd, 1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
|
||||
* is wrong for i8259!) not the output IRQ. Many BIOS writers
|
||||
* don't bother configuring *any* comparator interrupts.
|
||||
*/
|
||||
hd.hd_irq[0] = HPET_LEGACY_8254;
|
||||
hd.hd_irq[1] = HPET_LEGACY_RTC;
|
||||
|
||||
|
|
|
@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void)
|
|||
printk(KERN_DEBUG "Force enabled HPET at resume\n");
|
||||
}
|
||||
|
||||
static u32 ati_ixp4x0_rev(struct pci_dev *dev)
|
||||
{
|
||||
u32 d;
|
||||
u8 b;
|
||||
|
||||
pci_read_config_byte(dev, 0xac, &b);
|
||||
b &= ~(1<<5);
|
||||
pci_write_config_byte(dev, 0xac, b);
|
||||
pci_read_config_dword(dev, 0x70, &d);
|
||||
d |= 1<<8;
|
||||
pci_write_config_dword(dev, 0x70, d);
|
||||
pci_read_config_dword(dev, 0x8, &d);
|
||||
d &= 0xff;
|
||||
dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d);
|
||||
return d;
|
||||
}
|
||||
|
||||
static void ati_force_enable_hpet(struct pci_dev *dev)
|
||||
{
|
||||
u32 uninitialized_var(val);
|
||||
u32 d, val;
|
||||
u8 b;
|
||||
|
||||
if (hpet_address || force_hpet_address)
|
||||
return;
|
||||
|
@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev)
|
|||
return;
|
||||
}
|
||||
|
||||
d = ati_ixp4x0_rev(dev);
|
||||
if (d < 0x82)
|
||||
return;
|
||||
|
||||
/* base address */
|
||||
pci_write_config_dword(dev, 0x14, 0xfed00000);
|
||||
pci_read_config_dword(dev, 0x14, &val);
|
||||
|
||||
/* enable interrupt */
|
||||
outb(0x72, 0xcd6); b = inb(0xcd7);
|
||||
b |= 0x1;
|
||||
outb(0x72, 0xcd6); outb(b, 0xcd7);
|
||||
outb(0x72, 0xcd6); b = inb(0xcd7);
|
||||
if (!(b & 0x1))
|
||||
return;
|
||||
pci_read_config_dword(dev, 0x64, &d);
|
||||
d |= (1<<10);
|
||||
pci_write_config_dword(dev, 0x64, d);
|
||||
pci_read_config_dword(dev, 0x64, &d);
|
||||
if (!(d & (1<<10)))
|
||||
return;
|
||||
|
||||
force_hpet_address = val;
|
||||
force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
|
||||
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
|
||||
force_hpet_address);
|
||||
cached_dev = dev;
|
||||
return;
|
||||
}
|
||||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
|
||||
ati_force_enable_hpet);
|
||||
|
|
|
@ -7,6 +7,6 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
|
|||
timer_int.o )
|
||||
|
||||
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
|
||||
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
|
||||
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
|
||||
op_model_ppro.o op_model_p4.o
|
||||
oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
/**
|
||||
* @file nmi_int.c
|
||||
*
|
||||
* @remark Copyright 2002 OProfile authors
|
||||
* @remark Copyright 2002-2008 OProfile authors
|
||||
* @remark Read the file COPYING
|
||||
*
|
||||
* @author John Levon <levon@movementarian.org>
|
||||
* @author Robert Richter <robert.richter@amd.com>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
|
@ -439,6 +440,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
|
|||
__u8 vendor = boot_cpu_data.x86_vendor;
|
||||
__u8 family = boot_cpu_data.x86;
|
||||
char *cpu_type;
|
||||
int ret = 0;
|
||||
|
||||
if (!cpu_has_apic)
|
||||
return -ENODEV;
|
||||
|
@ -451,19 +453,23 @@ int __init op_nmi_init(struct oprofile_operations *ops)
|
|||
default:
|
||||
return -ENODEV;
|
||||
case 6:
|
||||
model = &op_athlon_spec;
|
||||
model = &op_amd_spec;
|
||||
cpu_type = "i386/athlon";
|
||||
break;
|
||||
case 0xf:
|
||||
model = &op_athlon_spec;
|
||||
model = &op_amd_spec;
|
||||
/* Actually it could be i386/hammer too, but give
|
||||
user space an consistent name. */
|
||||
cpu_type = "x86-64/hammer";
|
||||
break;
|
||||
case 0x10:
|
||||
model = &op_athlon_spec;
|
||||
model = &op_amd_spec;
|
||||
cpu_type = "x86-64/family10";
|
||||
break;
|
||||
case 0x11:
|
||||
model = &op_amd_spec;
|
||||
cpu_type = "x86-64/family11h";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -490,17 +496,24 @@ int __init op_nmi_init(struct oprofile_operations *ops)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
init_sysfs();
|
||||
#ifdef CONFIG_SMP
|
||||
register_cpu_notifier(&oprofile_cpu_nb);
|
||||
#endif
|
||||
using_nmi = 1;
|
||||
/* default values, can be overwritten by model */
|
||||
ops->create_files = nmi_create_files;
|
||||
ops->setup = nmi_setup;
|
||||
ops->shutdown = nmi_shutdown;
|
||||
ops->start = nmi_start;
|
||||
ops->stop = nmi_stop;
|
||||
ops->cpu_type = cpu_type;
|
||||
|
||||
if (model->init)
|
||||
ret = model->init(ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
init_sysfs();
|
||||
using_nmi = 1;
|
||||
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -513,4 +526,6 @@ void op_nmi_exit(void)
|
|||
unregister_cpu_notifier(&oprofile_cpu_nb);
|
||||
#endif
|
||||
}
|
||||
if (model->exit)
|
||||
model->exit();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,543 @@
|
|||
/*
|
||||
* @file op_model_amd.c
|
||||
* athlon / K7 / K8 / Family 10h model-specific MSR operations
|
||||
*
|
||||
* @remark Copyright 2002-2008 OProfile authors
|
||||
* @remark Read the file COPYING
|
||||
*
|
||||
* @author John Levon
|
||||
* @author Philippe Elie
|
||||
* @author Graydon Hoare
|
||||
* @author Robert Richter <robert.richter@amd.com>
|
||||
* @author Barry Kasindorf
|
||||
*/
|
||||
|
||||
#include <linux/oprofile.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#include "op_x86_model.h"
|
||||
#include "op_counter.h"
|
||||
|
||||
#define NUM_COUNTERS 4
|
||||
#define NUM_CONTROLS 4
|
||||
|
||||
#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
|
||||
#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
|
||||
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
|
||||
|
||||
#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
|
||||
#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
|
||||
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
|
||||
#define CTRL_CLEAR_LO(x) (x &= (1<<21))
|
||||
#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
|
||||
#define CTRL_SET_ENABLE(val) (val |= 1<<20)
|
||||
#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
|
||||
#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
|
||||
#define CTRL_SET_UM(val, m) (val |= (m << 8))
|
||||
#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
|
||||
#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
|
||||
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
|
||||
#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
|
||||
|
||||
static unsigned long reset_value[NUM_COUNTERS];
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
|
||||
/* IbsFetchCtl bits/masks */
|
||||
#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */
|
||||
#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */
|
||||
#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */
|
||||
|
||||
/*IbsOpCtl bits */
|
||||
#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
|
||||
#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
|
||||
|
||||
/* Codes used in cpu_buffer.c */
|
||||
/* This produces duplicate code, need to be fixed */
|
||||
#define IBS_FETCH_BEGIN 3
|
||||
#define IBS_OP_BEGIN 4
|
||||
|
||||
/* The function interface needs to be fixed, something like add
|
||||
data. Should then be added to linux/oprofile.h. */
|
||||
extern void oprofile_add_ibs_sample(struct pt_regs *const regs,
|
||||
unsigned int * const ibs_sample, u8 code);
|
||||
|
||||
struct ibs_fetch_sample {
|
||||
/* MSRC001_1031 IBS Fetch Linear Address Register */
|
||||
unsigned int ibs_fetch_lin_addr_low;
|
||||
unsigned int ibs_fetch_lin_addr_high;
|
||||
/* MSRC001_1030 IBS Fetch Control Register */
|
||||
unsigned int ibs_fetch_ctl_low;
|
||||
unsigned int ibs_fetch_ctl_high;
|
||||
/* MSRC001_1032 IBS Fetch Physical Address Register */
|
||||
unsigned int ibs_fetch_phys_addr_low;
|
||||
unsigned int ibs_fetch_phys_addr_high;
|
||||
};
|
||||
|
||||
struct ibs_op_sample {
|
||||
/* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */
|
||||
unsigned int ibs_op_rip_low;
|
||||
unsigned int ibs_op_rip_high;
|
||||
/* MSRC001_1035 IBS Op Data Register */
|
||||
unsigned int ibs_op_data1_low;
|
||||
unsigned int ibs_op_data1_high;
|
||||
/* MSRC001_1036 IBS Op Data 2 Register */
|
||||
unsigned int ibs_op_data2_low;
|
||||
unsigned int ibs_op_data2_high;
|
||||
/* MSRC001_1037 IBS Op Data 3 Register */
|
||||
unsigned int ibs_op_data3_low;
|
||||
unsigned int ibs_op_data3_high;
|
||||
/* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */
|
||||
unsigned int ibs_dc_linear_low;
|
||||
unsigned int ibs_dc_linear_high;
|
||||
/* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */
|
||||
unsigned int ibs_dc_phys_low;
|
||||
unsigned int ibs_dc_phys_high;
|
||||
};
|
||||
|
||||
/*
|
||||
* unitialize the APIC for the IBS interrupts if needed on AMD Family10h+
|
||||
*/
|
||||
static void clear_ibs_nmi(void);
|
||||
|
||||
static int ibs_allowed; /* AMD Family10h and later */
|
||||
|
||||
struct op_ibs_config {
|
||||
unsigned long op_enabled;
|
||||
unsigned long fetch_enabled;
|
||||
unsigned long max_cnt_fetch;
|
||||
unsigned long max_cnt_op;
|
||||
unsigned long rand_en;
|
||||
unsigned long dispatched_ops;
|
||||
};
|
||||
|
||||
static struct op_ibs_config ibs_config;
|
||||
|
||||
#endif
|
||||
|
||||
/* functions for op_amd_spec */
|
||||
|
||||
static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_COUNTERS; i++) {
|
||||
if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
|
||||
msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
|
||||
else
|
||||
msrs->counters[i].addr = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_CONTROLS; i++) {
|
||||
if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
|
||||
msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
|
||||
else
|
||||
msrs->controls[i].addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
/* clear all counters */
|
||||
for (i = 0 ; i < NUM_CONTROLS; ++i) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR_LO(low);
|
||||
CTRL_CLEAR_HI(high);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
|
||||
/* avoid a false detection of ctr overflows in NMI handler */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (unlikely(!CTR_IS_RESERVED(msrs, i)))
|
||||
continue;
|
||||
CTR_WRITE(1, msrs, i);
|
||||
}
|
||||
|
||||
/* enable active counters */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
|
||||
reset_value[i] = counter_config[i].count;
|
||||
|
||||
CTR_WRITE(counter_config[i].count, msrs, i);
|
||||
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR_LO(low);
|
||||
CTRL_CLEAR_HI(high);
|
||||
CTRL_SET_ENABLE(low);
|
||||
CTRL_SET_USR(low, counter_config[i].user);
|
||||
CTRL_SET_KERN(low, counter_config[i].kernel);
|
||||
CTRL_SET_UM(low, counter_config[i].unit_mask);
|
||||
CTRL_SET_EVENT_LOW(low, counter_config[i].event);
|
||||
CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
|
||||
CTRL_SET_HOST_ONLY(high, 0);
|
||||
CTRL_SET_GUEST_ONLY(high, 0);
|
||||
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
} else {
|
||||
reset_value[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
|
||||
static inline int
|
||||
op_amd_handle_ibs(struct pt_regs * const regs,
|
||||
struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
struct ibs_fetch_sample ibs_fetch;
|
||||
struct ibs_op_sample ibs_op;
|
||||
|
||||
if (!ibs_allowed)
|
||||
return 1;
|
||||
|
||||
if (ibs_config.fetch_enabled) {
|
||||
rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
|
||||
if (high & IBS_FETCH_HIGH_VALID_BIT) {
|
||||
ibs_fetch.ibs_fetch_ctl_high = high;
|
||||
ibs_fetch.ibs_fetch_ctl_low = low;
|
||||
rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high);
|
||||
ibs_fetch.ibs_fetch_lin_addr_high = high;
|
||||
ibs_fetch.ibs_fetch_lin_addr_low = low;
|
||||
rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high);
|
||||
ibs_fetch.ibs_fetch_phys_addr_high = high;
|
||||
ibs_fetch.ibs_fetch_phys_addr_low = low;
|
||||
|
||||
oprofile_add_ibs_sample(regs,
|
||||
(unsigned int *)&ibs_fetch,
|
||||
IBS_FETCH_BEGIN);
|
||||
|
||||
/*reenable the IRQ */
|
||||
rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
|
||||
high &= ~IBS_FETCH_HIGH_VALID_BIT;
|
||||
high |= IBS_FETCH_HIGH_ENABLE;
|
||||
low &= IBS_FETCH_LOW_MAX_CNT_MASK;
|
||||
wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
|
||||
}
|
||||
}
|
||||
|
||||
if (ibs_config.op_enabled) {
|
||||
rdmsr(MSR_AMD64_IBSOPCTL, low, high);
|
||||
if (low & IBS_OP_LOW_VALID_BIT) {
|
||||
rdmsr(MSR_AMD64_IBSOPRIP, low, high);
|
||||
ibs_op.ibs_op_rip_low = low;
|
||||
ibs_op.ibs_op_rip_high = high;
|
||||
rdmsr(MSR_AMD64_IBSOPDATA, low, high);
|
||||
ibs_op.ibs_op_data1_low = low;
|
||||
ibs_op.ibs_op_data1_high = high;
|
||||
rdmsr(MSR_AMD64_IBSOPDATA2, low, high);
|
||||
ibs_op.ibs_op_data2_low = low;
|
||||
ibs_op.ibs_op_data2_high = high;
|
||||
rdmsr(MSR_AMD64_IBSOPDATA3, low, high);
|
||||
ibs_op.ibs_op_data3_low = low;
|
||||
ibs_op.ibs_op_data3_high = high;
|
||||
rdmsr(MSR_AMD64_IBSDCLINAD, low, high);
|
||||
ibs_op.ibs_dc_linear_low = low;
|
||||
ibs_op.ibs_dc_linear_high = high;
|
||||
rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high);
|
||||
ibs_op.ibs_dc_phys_low = low;
|
||||
ibs_op.ibs_dc_phys_high = high;
|
||||
|
||||
/* reenable the IRQ */
|
||||
oprofile_add_ibs_sample(regs,
|
||||
(unsigned int *)&ibs_op,
|
||||
IBS_OP_BEGIN);
|
||||
rdmsr(MSR_AMD64_IBSOPCTL, low, high);
|
||||
high = 0;
|
||||
low &= ~IBS_OP_LOW_VALID_BIT;
|
||||
low |= IBS_OP_LOW_ENABLE;
|
||||
wrmsr(MSR_AMD64_IBSOPCTL, low, high);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int op_amd_check_ctrs(struct pt_regs * const regs,
|
||||
struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTR_READ(low, high, msrs, i);
|
||||
if (CTR_OVERFLOWED(low)) {
|
||||
oprofile_add_sample(regs, i);
|
||||
CTR_WRITE(reset_value[i], msrs, i);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
op_amd_handle_ibs(regs, msrs);
|
||||
#endif
|
||||
|
||||
/* See op_model_ppro.c */
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void op_amd_start(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (reset_value[i]) {
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_SET_ACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
if (ibs_allowed && ibs_config.fetch_enabled) {
|
||||
low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
|
||||
high = IBS_FETCH_HIGH_ENABLE;
|
||||
wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
|
||||
}
|
||||
|
||||
if (ibs_allowed && ibs_config.op_enabled) {
|
||||
low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE;
|
||||
high = 0;
|
||||
wrmsr(MSR_AMD64_IBSOPCTL, low, high);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void op_amd_stop(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
/* Subtle: stop on all counters to avoid race with
|
||||
* setting our pm callback */
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_SET_INACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
if (ibs_allowed && ibs_config.fetch_enabled) {
|
||||
low = 0; /* clear max count and enable */
|
||||
high = 0;
|
||||
wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
|
||||
}
|
||||
|
||||
if (ibs_allowed && ibs_config.op_enabled) {
|
||||
low = 0; /* clear max count and enable */
|
||||
high = 0;
|
||||
wrmsr(MSR_AMD64_IBSOPCTL, low, high);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void op_amd_shutdown(struct op_msrs const * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (CTR_IS_RESERVED(msrs, i))
|
||||
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
|
||||
}
|
||||
for (i = 0 ; i < NUM_CONTROLS ; ++i) {
|
||||
if (CTRL_IS_RESERVED(msrs, i))
|
||||
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef CONFIG_OPROFILE_IBS
|
||||
|
||||
/* no IBS support */
|
||||
|
||||
static int op_amd_init(struct oprofile_operations *ops)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void op_amd_exit(void) {}
|
||||
|
||||
#else
|
||||
|
||||
static u8 ibs_eilvt_off;
|
||||
|
||||
static inline void apic_init_ibs_nmi_per_cpu(void *arg)
|
||||
{
|
||||
ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0);
|
||||
}
|
||||
|
||||
static inline void apic_clear_ibs_nmi_per_cpu(void *arg)
|
||||
{
|
||||
setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1);
|
||||
}
|
||||
|
||||
static int pfm_amd64_setup_eilvt(void)
|
||||
{
|
||||
#define IBSCTL_LVTOFFSETVAL (1 << 8)
|
||||
#define IBSCTL 0x1cc
|
||||
struct pci_dev *cpu_cfg;
|
||||
int nodes;
|
||||
u32 value = 0;
|
||||
|
||||
/* per CPU setup */
|
||||
on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1);
|
||||
|
||||
nodes = 0;
|
||||
cpu_cfg = NULL;
|
||||
do {
|
||||
cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
|
||||
PCI_DEVICE_ID_AMD_10H_NB_MISC,
|
||||
cpu_cfg);
|
||||
if (!cpu_cfg)
|
||||
break;
|
||||
++nodes;
|
||||
pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
|
||||
| IBSCTL_LVTOFFSETVAL);
|
||||
pci_read_config_dword(cpu_cfg, IBSCTL, &value);
|
||||
if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
|
||||
printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
|
||||
"IBSCTL = 0x%08x", value);
|
||||
return 1;
|
||||
}
|
||||
} while (1);
|
||||
|
||||
if (!nodes) {
|
||||
printk(KERN_DEBUG "No CPU node configured for IBS");
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/* Sanity check */
|
||||
/* Works only for 64bit with proper numa implementation. */
|
||||
if (nodes != num_possible_nodes()) {
|
||||
printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
|
||||
"found: %d, expected %d",
|
||||
nodes, num_possible_nodes());
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize the APIC for the IBS interrupts
|
||||
* if available (AMD Family10h rev B0 and later)
|
||||
*/
|
||||
static void setup_ibs(void)
|
||||
{
|
||||
ibs_allowed = boot_cpu_has(X86_FEATURE_IBS);
|
||||
|
||||
if (!ibs_allowed)
|
||||
return;
|
||||
|
||||
if (pfm_amd64_setup_eilvt()) {
|
||||
ibs_allowed = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
printk(KERN_INFO "oprofile: AMD IBS detected\n");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* unitialize the APIC for the IBS interrupts if needed on AMD Family10h
|
||||
* rev B0 and later */
|
||||
static void clear_ibs_nmi(void)
|
||||
{
|
||||
if (ibs_allowed)
|
||||
on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
|
||||
}
|
||||
|
||||
static int (*create_arch_files)(struct super_block * sb, struct dentry * root);
|
||||
|
||||
static int setup_ibs_files(struct super_block * sb, struct dentry * root)
|
||||
{
|
||||
char buf[12];
|
||||
struct dentry *dir;
|
||||
int ret = 0;
|
||||
|
||||
/* architecture specific files */
|
||||
if (create_arch_files)
|
||||
ret = create_arch_files(sb, root);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!ibs_allowed)
|
||||
return ret;
|
||||
|
||||
/* model specific files */
|
||||
|
||||
/* setup some reasonable defaults */
|
||||
ibs_config.max_cnt_fetch = 250000;
|
||||
ibs_config.fetch_enabled = 0;
|
||||
ibs_config.max_cnt_op = 250000;
|
||||
ibs_config.op_enabled = 0;
|
||||
ibs_config.dispatched_ops = 1;
|
||||
snprintf(buf, sizeof(buf), "ibs_fetch");
|
||||
dir = oprofilefs_mkdir(sb, root, buf);
|
||||
oprofilefs_create_ulong(sb, dir, "rand_enable",
|
||||
&ibs_config.rand_en);
|
||||
oprofilefs_create_ulong(sb, dir, "enable",
|
||||
&ibs_config.fetch_enabled);
|
||||
oprofilefs_create_ulong(sb, dir, "max_count",
|
||||
&ibs_config.max_cnt_fetch);
|
||||
snprintf(buf, sizeof(buf), "ibs_uops");
|
||||
dir = oprofilefs_mkdir(sb, root, buf);
|
||||
oprofilefs_create_ulong(sb, dir, "enable",
|
||||
&ibs_config.op_enabled);
|
||||
oprofilefs_create_ulong(sb, dir, "max_count",
|
||||
&ibs_config.max_cnt_op);
|
||||
oprofilefs_create_ulong(sb, dir, "dispatched_ops",
|
||||
&ibs_config.dispatched_ops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int op_amd_init(struct oprofile_operations *ops)
|
||||
{
|
||||
setup_ibs();
|
||||
create_arch_files = ops->create_files;
|
||||
ops->create_files = setup_ibs_files;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void op_amd_exit(void)
|
||||
{
|
||||
clear_ibs_nmi();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
struct op_x86_model_spec const op_amd_spec = {
|
||||
.init = op_amd_init,
|
||||
.exit = op_amd_exit,
|
||||
.num_counters = NUM_COUNTERS,
|
||||
.num_controls = NUM_CONTROLS,
|
||||
.fill_in_addresses = &op_amd_fill_in_addresses,
|
||||
.setup_ctrs = &op_amd_setup_ctrs,
|
||||
.check_ctrs = &op_amd_check_ctrs,
|
||||
.start = &op_amd_start,
|
||||
.stop = &op_amd_stop,
|
||||
.shutdown = &op_amd_shutdown
|
||||
};
|
|
@ -1,190 +0,0 @@
|
|||
/*
|
||||
* @file op_model_athlon.h
|
||||
* athlon / K7 / K8 / Family 10h model-specific MSR operations
|
||||
*
|
||||
* @remark Copyright 2002 OProfile authors
|
||||
* @remark Read the file COPYING
|
||||
*
|
||||
* @author John Levon
|
||||
* @author Philippe Elie
|
||||
* @author Graydon Hoare
|
||||
*/
|
||||
|
||||
#include <linux/oprofile.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
#include "op_x86_model.h"
|
||||
#include "op_counter.h"
|
||||
|
||||
#define NUM_COUNTERS 4
|
||||
#define NUM_CONTROLS 4
|
||||
|
||||
#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
|
||||
#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
|
||||
#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
|
||||
|
||||
#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
|
||||
#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
|
||||
#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
|
||||
#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
|
||||
#define CTRL_CLEAR_LO(x) (x &= (1<<21))
|
||||
#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
|
||||
#define CTRL_SET_ENABLE(val) (val |= 1<<20)
|
||||
#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
|
||||
#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
|
||||
#define CTRL_SET_UM(val, m) (val |= (m << 8))
|
||||
#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
|
||||
#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
|
||||
#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
|
||||
#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
|
||||
|
||||
static unsigned long reset_value[NUM_COUNTERS];
|
||||
|
||||
static void athlon_fill_in_addresses(struct op_msrs * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_COUNTERS; i++) {
|
||||
if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
|
||||
msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
|
||||
else
|
||||
msrs->counters[i].addr = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_CONTROLS; i++) {
|
||||
if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
|
||||
msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
|
||||
else
|
||||
msrs->controls[i].addr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void athlon_setup_ctrs(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
/* clear all counters */
|
||||
for (i = 0 ; i < NUM_CONTROLS; ++i) {
|
||||
if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR_LO(low);
|
||||
CTRL_CLEAR_HI(high);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
|
||||
/* avoid a false detection of ctr overflows in NMI handler */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if (unlikely(!CTR_IS_RESERVED(msrs, i)))
|
||||
continue;
|
||||
CTR_WRITE(1, msrs, i);
|
||||
}
|
||||
|
||||
/* enable active counters */
|
||||
for (i = 0; i < NUM_COUNTERS; ++i) {
|
||||
if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
|
||||
reset_value[i] = counter_config[i].count;
|
||||
|
||||
CTR_WRITE(counter_config[i].count, msrs, i);
|
||||
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_CLEAR_LO(low);
|
||||
CTRL_CLEAR_HI(high);
|
||||
CTRL_SET_ENABLE(low);
|
||||
CTRL_SET_USR(low, counter_config[i].user);
|
||||
CTRL_SET_KERN(low, counter_config[i].kernel);
|
||||
CTRL_SET_UM(low, counter_config[i].unit_mask);
|
||||
CTRL_SET_EVENT_LOW(low, counter_config[i].event);
|
||||
CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
|
||||
CTRL_SET_HOST_ONLY(high, 0);
|
||||
CTRL_SET_GUEST_ONLY(high, 0);
|
||||
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
} else {
|
||||
reset_value[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int athlon_check_ctrs(struct pt_regs * const regs,
|
||||
struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTR_READ(low, high, msrs, i);
|
||||
if (CTR_OVERFLOWED(low)) {
|
||||
oprofile_add_sample(regs, i);
|
||||
CTR_WRITE(reset_value[i], msrs, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* See op_model_ppro.c */
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static void athlon_start(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (reset_value[i]) {
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_SET_ACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void athlon_stop(struct op_msrs const * const msrs)
|
||||
{
|
||||
unsigned int low, high;
|
||||
int i;
|
||||
|
||||
/* Subtle: stop on all counters to avoid race with
|
||||
* setting our pm callback */
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (!reset_value[i])
|
||||
continue;
|
||||
CTRL_READ(low, high, msrs, i);
|
||||
CTRL_SET_INACTIVE(low);
|
||||
CTRL_WRITE(low, high, msrs, i);
|
||||
}
|
||||
}
|
||||
|
||||
static void athlon_shutdown(struct op_msrs const * const msrs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < NUM_COUNTERS ; ++i) {
|
||||
if (CTR_IS_RESERVED(msrs, i))
|
||||
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
|
||||
}
|
||||
for (i = 0 ; i < NUM_CONTROLS ; ++i) {
|
||||
if (CTRL_IS_RESERVED(msrs, i))
|
||||
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
|
||||
}
|
||||
}
|
||||
|
||||
struct op_x86_model_spec const op_athlon_spec = {
|
||||
.num_counters = NUM_COUNTERS,
|
||||
.num_controls = NUM_CONTROLS,
|
||||
.fill_in_addresses = &athlon_fill_in_addresses,
|
||||
.setup_ctrs = &athlon_setup_ctrs,
|
||||
.check_ctrs = &athlon_check_ctrs,
|
||||
.start = &athlon_start,
|
||||
.stop = &athlon_stop,
|
||||
.shutdown = &athlon_shutdown
|
||||
};
|
|
@ -32,6 +32,8 @@ struct pt_regs;
|
|||
* various x86 CPU models' perfctr support.
|
||||
*/
|
||||
struct op_x86_model_spec {
|
||||
int (*init)(struct oprofile_operations *ops);
|
||||
void (*exit)(void);
|
||||
unsigned int const num_counters;
|
||||
unsigned int const num_controls;
|
||||
void (*fill_in_addresses)(struct op_msrs * const msrs);
|
||||
|
@ -46,6 +48,6 @@ struct op_x86_model_spec {
|
|||
extern struct op_x86_model_spec const op_ppro_spec;
|
||||
extern struct op_x86_model_spec const op_p4_spec;
|
||||
extern struct op_x86_model_spec const op_p4_ht2_spec;
|
||||
extern struct op_x86_model_spec const op_athlon_spec;
|
||||
extern struct op_x86_model_spec const op_amd_spec;
|
||||
|
||||
#endif /* OP_X86_MODEL_H */
|
||||
|
|
|
@ -511,3 +511,31 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
|
|||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
|
||||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
|
||||
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
|
||||
|
||||
/*
|
||||
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
|
||||
* confusing the PCI engine:
|
||||
*/
|
||||
static void sb600_disable_hpet_bar(struct pci_dev *dev)
|
||||
{
|
||||
u8 val;
|
||||
|
||||
/*
|
||||
* The SB600 and SB700 both share the same device
|
||||
* ID, but the PM register 0x55 does something different
|
||||
* for the SB700, so make sure we are dealing with the
|
||||
* SB600 before touching the bit:
|
||||
*/
|
||||
|
||||
pci_read_config_byte(dev, 0x08, &val);
|
||||
|
||||
if (val < 0x2F) {
|
||||
outb(0x55, 0xCD6);
|
||||
val = inb(0xCD7);
|
||||
|
||||
/* Set bit 7 in PM register 0x55 */
|
||||
outb(0x55, 0xCD6);
|
||||
outb(val | 0x80, 0xCD7);
|
||||
}
|
||||
}
|
||||
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
|
||||
|
|
|
@ -53,6 +53,11 @@
|
|||
|
||||
#define HPET_RANGE_SIZE 1024 /* from HPET spec */
|
||||
|
||||
|
||||
/* WARNING -- don't get confused. These macros are never used
|
||||
* to write the (single) counter, and rarely to read it.
|
||||
* They're badly named; to fix, someday.
|
||||
*/
|
||||
#if BITS_PER_LONG == 64
|
||||
#define write_counter(V, MC) writeq(V, MC)
|
||||
#define read_counter(MC) readq(MC)
|
||||
|
@ -77,7 +82,7 @@ static struct clocksource clocksource_hpet = {
|
|||
.rating = 250,
|
||||
.read = read_hpet,
|
||||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.mult = 0, /*to be caluclated*/
|
||||
.mult = 0, /* to be calculated */
|
||||
.shift = 10,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
@ -86,8 +91,6 @@ static struct clocksource *hpet_clocksource;
|
|||
|
||||
/* A lock for concurrent access by app and isr hpet activity. */
|
||||
static DEFINE_SPINLOCK(hpet_lock);
|
||||
/* A lock for concurrent intermodule access to hpet and isr hpet activity. */
|
||||
static DEFINE_SPINLOCK(hpet_task_lock);
|
||||
|
||||
#define HPET_DEV_NAME (7)
|
||||
|
||||
|
@ -99,7 +102,6 @@ struct hpet_dev {
|
|||
unsigned long hd_irqdata;
|
||||
wait_queue_head_t hd_waitqueue;
|
||||
struct fasync_struct *hd_async_queue;
|
||||
struct hpet_task *hd_task;
|
||||
unsigned int hd_flags;
|
||||
unsigned int hd_irq;
|
||||
unsigned int hd_hdwirq;
|
||||
|
@ -173,11 +175,6 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
|
|||
writel(isr, &devp->hd_hpet->hpet_isr);
|
||||
spin_unlock(&hpet_lock);
|
||||
|
||||
spin_lock(&hpet_task_lock);
|
||||
if (devp->hd_task)
|
||||
devp->hd_task->ht_func(devp->hd_task->ht_data);
|
||||
spin_unlock(&hpet_task_lock);
|
||||
|
||||
wake_up_interruptible(&devp->hd_waitqueue);
|
||||
|
||||
kill_fasync(&devp->hd_async_queue, SIGIO, POLL_IN);
|
||||
|
@ -185,6 +182,67 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
|
|||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void hpet_timer_set_irq(struct hpet_dev *devp)
|
||||
{
|
||||
unsigned long v;
|
||||
int irq, gsi;
|
||||
struct hpet_timer __iomem *timer;
|
||||
|
||||
spin_lock_irq(&hpet_lock);
|
||||
if (devp->hd_hdwirq) {
|
||||
spin_unlock_irq(&hpet_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
timer = devp->hd_timer;
|
||||
|
||||
/* we prefer level triggered mode */
|
||||
v = readl(&timer->hpet_config);
|
||||
if (!(v & Tn_INT_TYPE_CNF_MASK)) {
|
||||
v |= Tn_INT_TYPE_CNF_MASK;
|
||||
writel(v, &timer->hpet_config);
|
||||
}
|
||||
spin_unlock_irq(&hpet_lock);
|
||||
|
||||
v = (readq(&timer->hpet_config) & Tn_INT_ROUTE_CAP_MASK) >>
|
||||
Tn_INT_ROUTE_CAP_SHIFT;
|
||||
|
||||
/*
|
||||
* In PIC mode, skip IRQ0-4, IRQ6-9, IRQ12-15 which is always used by
|
||||
* legacy device. In IO APIC mode, we skip all the legacy IRQS.
|
||||
*/
|
||||
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC)
|
||||
v &= ~0xf3df;
|
||||
else
|
||||
v &= ~0xffff;
|
||||
|
||||
for (irq = find_first_bit(&v, HPET_MAX_IRQ); irq < HPET_MAX_IRQ;
|
||||
irq = find_next_bit(&v, HPET_MAX_IRQ, 1 + irq)) {
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
irq = HPET_MAX_IRQ;
|
||||
break;
|
||||
}
|
||||
|
||||
gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE,
|
||||
ACPI_ACTIVE_LOW);
|
||||
if (gsi > 0)
|
||||
break;
|
||||
|
||||
/* FIXME: Setup interrupt source table */
|
||||
}
|
||||
|
||||
if (irq < HPET_MAX_IRQ) {
|
||||
spin_lock_irq(&hpet_lock);
|
||||
v = readl(&timer->hpet_config);
|
||||
v |= irq << Tn_INT_ROUTE_CNF_SHIFT;
|
||||
writel(v, &timer->hpet_config);
|
||||
devp->hd_hdwirq = gsi;
|
||||
spin_unlock_irq(&hpet_lock);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static int hpet_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct hpet_dev *devp;
|
||||
|
@ -199,8 +257,7 @@ static int hpet_open(struct inode *inode, struct file *file)
|
|||
|
||||
for (devp = NULL, hpetp = hpets; hpetp && !devp; hpetp = hpetp->hp_next)
|
||||
for (i = 0; i < hpetp->hp_ntimer; i++)
|
||||
if (hpetp->hp_dev[i].hd_flags & HPET_OPEN
|
||||
|| hpetp->hp_dev[i].hd_task)
|
||||
if (hpetp->hp_dev[i].hd_flags & HPET_OPEN)
|
||||
continue;
|
||||
else {
|
||||
devp = &hpetp->hp_dev[i];
|
||||
|
@ -219,6 +276,8 @@ static int hpet_open(struct inode *inode, struct file *file)
|
|||
spin_unlock_irq(&hpet_lock);
|
||||
unlock_kernel();
|
||||
|
||||
hpet_timer_set_irq(devp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -441,7 +500,11 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
|
|||
devp->hd_irq = irq;
|
||||
t = devp->hd_ireqfreq;
|
||||
v = readq(&timer->hpet_config);
|
||||
g = v | Tn_INT_ENB_CNF_MASK;
|
||||
|
||||
/* 64-bit comparators are not yet supported through the ioctls,
|
||||
* so force this into 32-bit mode if it supports both modes
|
||||
*/
|
||||
g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
|
||||
|
||||
if (devp->hd_flags & HPET_PERIODIC) {
|
||||
write_counter(t, &timer->hpet_compare);
|
||||
|
@ -451,6 +514,12 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
|
|||
v |= Tn_VAL_SET_CNF_MASK;
|
||||
writeq(v, &timer->hpet_config);
|
||||
local_irq_save(flags);
|
||||
|
||||
/* NOTE: what we modify here is a hidden accumulator
|
||||
* register supported by periodic-capable comparators.
|
||||
* We never want to modify the (single) counter; that
|
||||
* would affect all the comparators.
|
||||
*/
|
||||
m = read_counter(&hpet->hpet_mc);
|
||||
write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
|
||||
} else {
|
||||
|
@ -604,57 +673,6 @@ static int hpet_is_known(struct hpet_data *hdp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int hpet_tpcheck(struct hpet_task *tp)
|
||||
{
|
||||
struct hpet_dev *devp;
|
||||
struct hpets *hpetp;
|
||||
|
||||
devp = tp->ht_opaque;
|
||||
|
||||
if (!devp)
|
||||
return -ENXIO;
|
||||
|
||||
for (hpetp = hpets; hpetp; hpetp = hpetp->hp_next)
|
||||
if (devp >= hpetp->hp_dev
|
||||
&& devp < (hpetp->hp_dev + hpetp->hp_ntimer)
|
||||
&& devp->hd_hpet == hpetp->hp_hpet)
|
||||
return 0;
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int hpet_unregister(struct hpet_task *tp)
|
||||
{
|
||||
struct hpet_dev *devp;
|
||||
struct hpet_timer __iomem *timer;
|
||||
int err;
|
||||
|
||||
if ((err = hpet_tpcheck(tp)))
|
||||
return err;
|
||||
|
||||
spin_lock_irq(&hpet_task_lock);
|
||||
spin_lock(&hpet_lock);
|
||||
|
||||
devp = tp->ht_opaque;
|
||||
if (devp->hd_task != tp) {
|
||||
spin_unlock(&hpet_lock);
|
||||
spin_unlock_irq(&hpet_task_lock);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
timer = devp->hd_timer;
|
||||
writeq((readq(&timer->hpet_config) & ~Tn_INT_ENB_CNF_MASK),
|
||||
&timer->hpet_config);
|
||||
devp->hd_flags &= ~(HPET_IE | HPET_PERIODIC);
|
||||
devp->hd_task = NULL;
|
||||
spin_unlock(&hpet_lock);
|
||||
spin_unlock_irq(&hpet_task_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
||||
static ctl_table hpet_table[] = {
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
|
@ -746,6 +764,7 @@ int hpet_alloc(struct hpet_data *hdp)
|
|||
static struct hpets *last = NULL;
|
||||
unsigned long period;
|
||||
unsigned long long temp;
|
||||
u32 remainder;
|
||||
|
||||
/*
|
||||
* hpet_alloc can be called by platform dependent code.
|
||||
|
@ -809,9 +828,13 @@ int hpet_alloc(struct hpet_data *hdp)
|
|||
printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]);
|
||||
printk("\n");
|
||||
|
||||
printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n",
|
||||
hpetp->hp_which, hpetp->hp_ntimer,
|
||||
cap & HPET_COUNTER_SIZE_MASK ? 64 : 32, hpetp->hp_tick_freq);
|
||||
temp = hpetp->hp_tick_freq;
|
||||
remainder = do_div(temp, 1000000);
|
||||
printk(KERN_INFO
|
||||
"hpet%u: %u comparators, %d-bit %u.%06u MHz counter\n",
|
||||
hpetp->hp_which, hpetp->hp_ntimer,
|
||||
cap & HPET_COUNTER_SIZE_MASK ? 64 : 32,
|
||||
(unsigned) temp, remainder);
|
||||
|
||||
mcfg = readq(&hpet->hpet_config);
|
||||
if ((mcfg & HPET_ENABLE_CNF_MASK) == 0) {
|
||||
|
@ -874,8 +897,6 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
|
|||
hdp->hd_address = ioremap(addr.minimum, addr.address_length);
|
||||
|
||||
if (hpet_is_known(hdp)) {
|
||||
printk(KERN_DEBUG "%s: 0x%lx is busy\n",
|
||||
__func__, hdp->hd_phys_address);
|
||||
iounmap(hdp->hd_address);
|
||||
return AE_ALREADY_EXISTS;
|
||||
}
|
||||
|
@ -891,8 +912,6 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
|
|||
HPET_RANGE_SIZE);
|
||||
|
||||
if (hpet_is_known(hdp)) {
|
||||
printk(KERN_DEBUG "%s: 0x%lx is busy\n",
|
||||
__func__, hdp->hd_phys_address);
|
||||
iounmap(hdp->hd_address);
|
||||
return AE_ALREADY_EXISTS;
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
* @remark Read the file COPYING
|
||||
*
|
||||
* @author John Levon <levon@movementarian.org>
|
||||
* @author Barry Kasindorf
|
||||
*
|
||||
* This is the core of the buffer management. Each
|
||||
* CPU buffer is processed and entered into the
|
||||
|
@ -33,7 +34,7 @@
|
|||
#include "event_buffer.h"
|
||||
#include "cpu_buffer.h"
|
||||
#include "buffer_sync.h"
|
||||
|
||||
|
||||
static LIST_HEAD(dying_tasks);
|
||||
static LIST_HEAD(dead_tasks);
|
||||
static cpumask_t marked_cpus = CPU_MASK_NONE;
|
||||
|
@ -48,10 +49,11 @@ static void process_task_mortuary(void);
|
|||
* Can be invoked from softirq via RCU callback due to
|
||||
* call_rcu() of the task struct, hence the _irqsave.
|
||||
*/
|
||||
static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
|
||||
static int
|
||||
task_free_notify(struct notifier_block *self, unsigned long val, void *data)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct * task = data;
|
||||
struct task_struct *task = data;
|
||||
spin_lock_irqsave(&task_mortuary, flags);
|
||||
list_add(&task->tasks, &dying_tasks);
|
||||
spin_unlock_irqrestore(&task_mortuary, flags);
|
||||
|
@ -62,13 +64,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi
|
|||
/* The task is on its way out. A sync of the buffer means we can catch
|
||||
* any remaining samples for this task.
|
||||
*/
|
||||
static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data)
|
||||
static int
|
||||
task_exit_notify(struct notifier_block *self, unsigned long val, void *data)
|
||||
{
|
||||
/* To avoid latency problems, we only process the current CPU,
|
||||
* hoping that most samples for the task are on this CPU
|
||||
*/
|
||||
sync_buffer(raw_smp_processor_id());
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -77,11 +80,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi
|
|||
* we don't lose any. This does not have to be exact, it's a QoI issue
|
||||
* only.
|
||||
*/
|
||||
static int munmap_notify(struct notifier_block * self, unsigned long val, void * data)
|
||||
static int
|
||||
munmap_notify(struct notifier_block *self, unsigned long val, void *data)
|
||||
{
|
||||
unsigned long addr = (unsigned long)data;
|
||||
struct mm_struct * mm = current->mm;
|
||||
struct vm_area_struct * mpnt;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *mpnt;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
|
||||
|
@ -99,11 +103,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void *
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* We need to be told about new modules so we don't attribute to a previously
|
||||
* loaded module, or drop the samples on the floor.
|
||||
*/
|
||||
static int module_load_notify(struct notifier_block * self, unsigned long val, void * data)
|
||||
static int
|
||||
module_load_notify(struct notifier_block *self, unsigned long val, void *data)
|
||||
{
|
||||
#ifdef CONFIG_MODULES
|
||||
if (val != MODULE_STATE_COMING)
|
||||
|
@ -118,7 +123,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static struct notifier_block task_free_nb = {
|
||||
.notifier_call = task_free_notify,
|
||||
};
|
||||
|
@ -135,7 +140,7 @@ static struct notifier_block module_load_nb = {
|
|||
.notifier_call = module_load_notify,
|
||||
};
|
||||
|
||||
|
||||
|
||||
static void end_sync(void)
|
||||
{
|
||||
end_cpu_work();
|
||||
|
@ -208,14 +213,14 @@ static inline unsigned long fast_get_dcookie(struct path *path)
|
|||
* not strictly necessary but allows oprofile to associate
|
||||
* shared-library samples with particular applications
|
||||
*/
|
||||
static unsigned long get_exec_dcookie(struct mm_struct * mm)
|
||||
static unsigned long get_exec_dcookie(struct mm_struct *mm)
|
||||
{
|
||||
unsigned long cookie = NO_COOKIE;
|
||||
struct vm_area_struct * vma;
|
||||
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (!vma->vm_file)
|
||||
continue;
|
||||
|
@ -235,13 +240,14 @@ static unsigned long get_exec_dcookie(struct mm_struct * mm)
|
|||
* sure to do this lookup before a mm->mmap modification happens so
|
||||
* we don't lose track.
|
||||
*/
|
||||
static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
|
||||
static unsigned long
|
||||
lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
|
||||
{
|
||||
unsigned long cookie = NO_COOKIE;
|
||||
struct vm_area_struct * vma;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
|
||||
|
||||
|
||||
if (addr < vma->vm_start || addr >= vma->vm_end)
|
||||
continue;
|
||||
|
||||
|
@ -263,9 +269,20 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o
|
|||
return cookie;
|
||||
}
|
||||
|
||||
static void increment_tail(struct oprofile_cpu_buffer *b)
|
||||
{
|
||||
unsigned long new_tail = b->tail_pos + 1;
|
||||
|
||||
rmb(); /* be sure fifo pointers are synchromized */
|
||||
|
||||
if (new_tail < b->buffer_size)
|
||||
b->tail_pos = new_tail;
|
||||
else
|
||||
b->tail_pos = 0;
|
||||
}
|
||||
|
||||
static unsigned long last_cookie = INVALID_COOKIE;
|
||||
|
||||
|
||||
static void add_cpu_switch(int i)
|
||||
{
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
|
@ -278,16 +295,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel)
|
|||
{
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
if (in_kernel)
|
||||
add_event_entry(KERNEL_ENTER_SWITCH_CODE);
|
||||
add_event_entry(KERNEL_ENTER_SWITCH_CODE);
|
||||
else
|
||||
add_event_entry(KERNEL_EXIT_SWITCH_CODE);
|
||||
add_event_entry(KERNEL_EXIT_SWITCH_CODE);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
|
||||
add_user_ctx_switch(struct task_struct const *task, unsigned long cookie)
|
||||
{
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
add_event_entry(CTX_SWITCH_CODE);
|
||||
add_event_entry(CTX_SWITCH_CODE);
|
||||
add_event_entry(task->pid);
|
||||
add_event_entry(cookie);
|
||||
/* Another code for daemon back-compat */
|
||||
|
@ -296,7 +313,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
|
|||
add_event_entry(task->tgid);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void add_cookie_switch(unsigned long cookie)
|
||||
{
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
|
@ -304,13 +321,78 @@ static void add_cookie_switch(unsigned long cookie)
|
|||
add_event_entry(cookie);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void add_trace_begin(void)
|
||||
{
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
add_event_entry(TRACE_BEGIN_CODE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
|
||||
#define IBS_FETCH_CODE_SIZE 2
|
||||
#define IBS_OP_CODE_SIZE 5
|
||||
#define IBS_EIP(offset) \
|
||||
(((struct op_sample *)&cpu_buf->buffer[(offset)])->eip)
|
||||
#define IBS_EVENT(offset) \
|
||||
(((struct op_sample *)&cpu_buf->buffer[(offset)])->event)
|
||||
|
||||
/*
|
||||
* Add IBS fetch and op entries to event buffer
|
||||
*/
|
||||
static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
|
||||
int in_kernel, struct mm_struct *mm)
|
||||
{
|
||||
unsigned long rip;
|
||||
int i, count;
|
||||
unsigned long ibs_cookie = 0;
|
||||
off_t offset;
|
||||
|
||||
increment_tail(cpu_buf); /* move to RIP entry */
|
||||
|
||||
rip = IBS_EIP(cpu_buf->tail_pos);
|
||||
|
||||
#ifdef __LP64__
|
||||
rip += IBS_EVENT(cpu_buf->tail_pos) << 32;
|
||||
#endif
|
||||
|
||||
if (mm) {
|
||||
ibs_cookie = lookup_dcookie(mm, rip, &offset);
|
||||
|
||||
if (ibs_cookie == NO_COOKIE)
|
||||
offset = rip;
|
||||
if (ibs_cookie == INVALID_COOKIE) {
|
||||
atomic_inc(&oprofile_stats.sample_lost_no_mapping);
|
||||
offset = rip;
|
||||
}
|
||||
if (ibs_cookie != last_cookie) {
|
||||
add_cookie_switch(ibs_cookie);
|
||||
last_cookie = ibs_cookie;
|
||||
}
|
||||
} else
|
||||
offset = rip;
|
||||
|
||||
add_event_entry(ESCAPE_CODE);
|
||||
add_event_entry(code);
|
||||
add_event_entry(offset); /* Offset from Dcookie */
|
||||
|
||||
/* we send the Dcookie offset, but send the raw Linear Add also*/
|
||||
add_event_entry(IBS_EIP(cpu_buf->tail_pos));
|
||||
add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
|
||||
|
||||
if (code == IBS_FETCH_CODE)
|
||||
count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
|
||||
else
|
||||
count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
increment_tail(cpu_buf);
|
||||
add_event_entry(IBS_EIP(cpu_buf->tail_pos));
|
||||
add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void add_sample_entry(unsigned long offset, unsigned long event)
|
||||
{
|
||||
|
@ -319,13 +401,13 @@ static void add_sample_entry(unsigned long offset, unsigned long event)
|
|||
}
|
||||
|
||||
|
||||
static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
|
||||
static int add_us_sample(struct mm_struct *mm, struct op_sample *s)
|
||||
{
|
||||
unsigned long cookie;
|
||||
off_t offset;
|
||||
|
||||
cookie = lookup_dcookie(mm, s->eip, &offset);
|
||||
|
||||
|
||||
cookie = lookup_dcookie(mm, s->eip, &offset);
|
||||
|
||||
if (cookie == INVALID_COOKIE) {
|
||||
atomic_inc(&oprofile_stats.sample_lost_no_mapping);
|
||||
return 0;
|
||||
|
@ -341,13 +423,13 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Add a sample to the global event buffer. If possible the
|
||||
* sample is converted into a persistent dentry/offset pair
|
||||
* for later lookup from userspace.
|
||||
*/
|
||||
static int
|
||||
add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
|
||||
add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
|
||||
{
|
||||
if (in_kernel) {
|
||||
add_sample_entry(s->eip, s->event);
|
||||
|
@ -359,9 +441,9 @@ add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void release_mm(struct mm_struct * mm)
|
||||
|
||||
static void release_mm(struct mm_struct *mm)
|
||||
{
|
||||
if (!mm)
|
||||
return;
|
||||
|
@ -370,9 +452,9 @@ static void release_mm(struct mm_struct * mm)
|
|||
}
|
||||
|
||||
|
||||
static struct mm_struct * take_tasks_mm(struct task_struct * task)
|
||||
static struct mm_struct *take_tasks_mm(struct task_struct *task)
|
||||
{
|
||||
struct mm_struct * mm = get_task_mm(task);
|
||||
struct mm_struct *mm = get_task_mm(task);
|
||||
if (mm)
|
||||
down_read(&mm->mmap_sem);
|
||||
return mm;
|
||||
|
@ -383,10 +465,10 @@ static inline int is_code(unsigned long val)
|
|||
{
|
||||
return val == ESCAPE_CODE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* "acquire" as many cpu buffer slots as we can */
|
||||
static unsigned long get_slots(struct oprofile_cpu_buffer * b)
|
||||
static unsigned long get_slots(struct oprofile_cpu_buffer *b)
|
||||
{
|
||||
unsigned long head = b->head_pos;
|
||||
unsigned long tail = b->tail_pos;
|
||||
|
@ -412,19 +494,6 @@ static unsigned long get_slots(struct oprofile_cpu_buffer * b)
|
|||
}
|
||||
|
||||
|
||||
static void increment_tail(struct oprofile_cpu_buffer * b)
|
||||
{
|
||||
unsigned long new_tail = b->tail_pos + 1;
|
||||
|
||||
rmb();
|
||||
|
||||
if (new_tail < b->buffer_size)
|
||||
b->tail_pos = new_tail;
|
||||
else
|
||||
b->tail_pos = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Move tasks along towards death. Any tasks on dead_tasks
|
||||
* will definitely have no remaining references in any
|
||||
* CPU buffers at this point, because we use two lists,
|
||||
|
@ -435,8 +504,8 @@ static void process_task_mortuary(void)
|
|||
{
|
||||
unsigned long flags;
|
||||
LIST_HEAD(local_dead_tasks);
|
||||
struct task_struct * task;
|
||||
struct task_struct * ttask;
|
||||
struct task_struct *task;
|
||||
struct task_struct *ttask;
|
||||
|
||||
spin_lock_irqsave(&task_mortuary, flags);
|
||||
|
||||
|
@ -493,7 +562,7 @@ void sync_buffer(int cpu)
|
|||
{
|
||||
struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
|
||||
struct mm_struct *mm = NULL;
|
||||
struct task_struct * new;
|
||||
struct task_struct *new;
|
||||
unsigned long cookie = 0;
|
||||
int in_kernel = 1;
|
||||
unsigned int i;
|
||||
|
@ -501,7 +570,7 @@ void sync_buffer(int cpu)
|
|||
unsigned long available;
|
||||
|
||||
mutex_lock(&buffer_mutex);
|
||||
|
||||
|
||||
add_cpu_switch(cpu);
|
||||
|
||||
/* Remember, only we can modify tail_pos */
|
||||
|
@ -509,8 +578,8 @@ void sync_buffer(int cpu)
|
|||
available = get_slots(cpu_buf);
|
||||
|
||||
for (i = 0; i < available; ++i) {
|
||||
struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
|
||||
|
||||
struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos];
|
||||
|
||||
if (is_code(s->eip)) {
|
||||
if (s->event <= CPU_IS_KERNEL) {
|
||||
/* kernel/userspace switch */
|
||||
|
@ -521,8 +590,18 @@ void sync_buffer(int cpu)
|
|||
} else if (s->event == CPU_TRACE_BEGIN) {
|
||||
state = sb_bt_start;
|
||||
add_trace_begin();
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
} else if (s->event == IBS_FETCH_BEGIN) {
|
||||
state = sb_bt_start;
|
||||
add_ibs_begin(cpu_buf,
|
||||
IBS_FETCH_CODE, in_kernel, mm);
|
||||
} else if (s->event == IBS_OP_BEGIN) {
|
||||
state = sb_bt_start;
|
||||
add_ibs_begin(cpu_buf,
|
||||
IBS_OP_CODE, in_kernel, mm);
|
||||
#endif
|
||||
} else {
|
||||
struct mm_struct * oldmm = mm;
|
||||
struct mm_struct *oldmm = mm;
|
||||
|
||||
/* userspace context switch */
|
||||
new = (struct task_struct *)s->event;
|
||||
|
@ -533,13 +612,11 @@ void sync_buffer(int cpu)
|
|||
cookie = get_exec_dcookie(mm);
|
||||
add_user_ctx_switch(new, cookie);
|
||||
}
|
||||
} else {
|
||||
if (state >= sb_bt_start &&
|
||||
!add_sample(mm, s, in_kernel)) {
|
||||
if (state == sb_bt_start) {
|
||||
state = sb_bt_ignore;
|
||||
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
|
||||
}
|
||||
} else if (state >= sb_bt_start &&
|
||||
!add_sample(mm, s, in_kernel)) {
|
||||
if (state == sb_bt_start) {
|
||||
state = sb_bt_ignore;
|
||||
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
* @remark Read the file COPYING
|
||||
*
|
||||
* @author John Levon <levon@movementarian.org>
|
||||
* @author Barry Kasindorf <barry.kasindorf@amd.com>
|
||||
*
|
||||
* Each CPU has a local buffer that stores PC value/event
|
||||
* pairs. We also log context switches when we notice them.
|
||||
|
@ -209,7 +210,7 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
|
||||
static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
|
||||
{
|
||||
if (nr_available_slots(cpu_buf) < 4) {
|
||||
cpu_buf->sample_lost_overflow++;
|
||||
|
@ -254,6 +255,75 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
|
|||
oprofile_add_ext_sample(pc, regs, event, is_kernel);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_OPROFILE_IBS
|
||||
|
||||
#define MAX_IBS_SAMPLE_SIZE 14
|
||||
static int log_ibs_sample(struct oprofile_cpu_buffer *cpu_buf,
|
||||
unsigned long pc, int is_kernel, unsigned int *ibs, int ibs_code)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
cpu_buf->sample_received++;
|
||||
|
||||
if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) {
|
||||
cpu_buf->sample_lost_overflow++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
is_kernel = !!is_kernel;
|
||||
|
||||
/* notice a switch from user->kernel or vice versa */
|
||||
if (cpu_buf->last_is_kernel != is_kernel) {
|
||||
cpu_buf->last_is_kernel = is_kernel;
|
||||
add_code(cpu_buf, is_kernel);
|
||||
}
|
||||
|
||||
/* notice a task switch */
|
||||
if (!is_kernel) {
|
||||
task = current;
|
||||
|
||||
if (cpu_buf->last_task != task) {
|
||||
cpu_buf->last_task = task;
|
||||
add_code(cpu_buf, (unsigned long)task);
|
||||
}
|
||||
}
|
||||
|
||||
add_code(cpu_buf, ibs_code);
|
||||
add_sample(cpu_buf, ibs[0], ibs[1]);
|
||||
add_sample(cpu_buf, ibs[2], ibs[3]);
|
||||
add_sample(cpu_buf, ibs[4], ibs[5]);
|
||||
|
||||
if (ibs_code == IBS_OP_BEGIN) {
|
||||
add_sample(cpu_buf, ibs[6], ibs[7]);
|
||||
add_sample(cpu_buf, ibs[8], ibs[9]);
|
||||
add_sample(cpu_buf, ibs[10], ibs[11]);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void oprofile_add_ibs_sample(struct pt_regs *const regs,
|
||||
unsigned int * const ibs_sample, u8 code)
|
||||
{
|
||||
int is_kernel = !user_mode(regs);
|
||||
unsigned long pc = profile_pc(regs);
|
||||
|
||||
struct oprofile_cpu_buffer *cpu_buf =
|
||||
&per_cpu(cpu_buffer, smp_processor_id());
|
||||
|
||||
if (!backtrace_depth) {
|
||||
log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if log_sample() fails we can't backtrace since we lost the source
|
||||
* of this event */
|
||||
if (log_ibs_sample(cpu_buf, pc, is_kernel, ibs_sample, code))
|
||||
oprofile_ops.backtrace(regs, backtrace_depth);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
|
||||
{
|
||||
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
|
||||
|
@ -296,7 +366,7 @@ static void wq_sync_buffer(struct work_struct *work)
|
|||
struct oprofile_cpu_buffer * b =
|
||||
container_of(work, struct oprofile_cpu_buffer, work.work);
|
||||
if (b->cpu != smp_processor_id()) {
|
||||
printk("WQ on CPU%d, prefer CPU%d\n",
|
||||
printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n",
|
||||
smp_processor_id(), b->cpu);
|
||||
}
|
||||
sync_buffer(b->cpu);
|
||||
|
|
|
@ -55,5 +55,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
|
|||
/* transient events for the CPU buffer -> event buffer */
|
||||
#define CPU_IS_KERNEL 1
|
||||
#define CPU_TRACE_BEGIN 2
|
||||
#define IBS_FETCH_BEGIN 3
|
||||
#define IBS_OP_BEGIN 4
|
||||
|
||||
#endif /* OPROFILE_CPU_BUFFER_H */
|
||||
|
|
|
@ -37,6 +37,7 @@ struct hpet {
|
|||
#define hpet_compare _u1._hpet_compare
|
||||
|
||||
#define HPET_MAX_TIMERS (32)
|
||||
#define HPET_MAX_IRQ (32)
|
||||
|
||||
/*
|
||||
* HPET general capabilities register
|
||||
|
@ -64,7 +65,7 @@ struct hpet {
|
|||
*/
|
||||
|
||||
#define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL)
|
||||
#define Tn_INI_ROUTE_CAP_SHIFT (32UL)
|
||||
#define Tn_INT_ROUTE_CAP_SHIFT (32UL)
|
||||
#define Tn_FSB_INT_DELCAP_MASK (0x8000UL)
|
||||
#define Tn_FSB_INT_DELCAP_SHIFT (15)
|
||||
#define Tn_FSB_EN_CNF_MASK (0x4000UL)
|
||||
|
@ -91,23 +92,14 @@ struct hpet {
|
|||
* exported interfaces
|
||||
*/
|
||||
|
||||
struct hpet_task {
|
||||
void (*ht_func) (void *);
|
||||
void *ht_data;
|
||||
void *ht_opaque;
|
||||
};
|
||||
|
||||
struct hpet_data {
|
||||
unsigned long hd_phys_address;
|
||||
void __iomem *hd_address;
|
||||
unsigned short hd_nirqs;
|
||||
unsigned short hd_flags;
|
||||
unsigned int hd_state; /* timer allocated */
|
||||
unsigned int hd_irq[HPET_MAX_TIMERS];
|
||||
};
|
||||
|
||||
#define HPET_DATA_PLATFORM 0x0001 /* platform call to hpet_alloc */
|
||||
|
||||
static inline void hpet_reserve_timer(struct hpet_data *hd, int timer)
|
||||
{
|
||||
hd->hd_state |= (1 << timer);
|
||||
|
@ -125,7 +117,7 @@ struct hpet_info {
|
|||
unsigned short hi_timer;
|
||||
};
|
||||
|
||||
#define HPET_INFO_PERIODIC 0x0001 /* timer is periodic */
|
||||
#define HPET_INFO_PERIODIC 0x0010 /* periodic-capable comparator */
|
||||
|
||||
#define HPET_IE_ON _IO('h', 0x01) /* interrupt on */
|
||||
#define HPET_IE_OFF _IO('h', 0x02) /* interrupt off */
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#define XEN_ENTER_SWITCH_CODE 10
|
||||
#define SPU_PROFILING_CODE 11
|
||||
#define SPU_CTX_SWITCH_CODE 12
|
||||
#define IBS_FETCH_CODE 13
|
||||
#define IBS_OP_CODE 14
|
||||
|
||||
struct super_block;
|
||||
struct dentry;
|
||||
|
|
Loading…
Reference in New Issue