diff --git a/MAINTAINERS b/MAINTAINERS index 33047a605438..13608bd2e791 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2887,6 +2887,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git S: Maintained F: drivers/input/ +INTEL IDLE DRIVER +M: Len Brown +L: linux-pm@lists.linux-foundation.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-idle-2.6.git +S: Supported +F: drivers/idle/intel_idle.c + INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) M: Maik Broemme L: linux-fbdev@vger.kernel.org diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 62ba9400cc43..f0b6e5dbc5a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -239,8 +239,8 @@ static inline struct thread_info *current_thread_info(void) #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#define TS_POLLING 0x0004 /* true if in idle loop - and not sleeping */ +#define TS_POLLING 0x0004 /* idle task polling need_resched, + skip sending interrupt */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) diff --git a/drivers/Makefile b/drivers/Makefile index f42a03029b7c..91874e048552 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ obj-$(CONFIG_RAPIDIO) += rapidio/ obj-y += video/ +obj-y += idle/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi @@ -91,7 +92,6 @@ obj-$(CONFIG_EISA) += eisa/ obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ -obj-y += idle/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_NEW_LEDS) += leds/ diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index f169e516a1af..d269a8f3329c 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -172,13 +172,6 @@ static int power_saving_thread(void *data) do_sleep = 0; - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we test - * NEED_RESCHED: - */ - smp_mb(); - expire_time = jiffies + HZ * (100 - idle_pct) / 100; while (!need_resched()) { @@ -209,8 +202,6 @@ static int power_saving_thread(void *data) } } - current_thread_info()->status |= TS_POLLING; - /* * current sched_rt has threshold for rt task running time. * When a rt task uses 95% CPU time, the rt thread will be diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 5675d9747e87..b1034a9ada4e 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -616,7 +616,8 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) acpi_processor_get_limit_info(pr); - acpi_processor_power_init(pr, device); + if (cpuidle_get_driver() == &acpi_idle_driver) + acpi_processor_power_init(pr, device); pr->cdev = thermal_cooling_device_register("Processor", device, &processor_cooling_ops); @@ -920,9 +921,14 @@ static int __init acpi_processor_init(void) if (!acpi_processor_dir) return -ENOMEM; #endif - result = cpuidle_register_driver(&acpi_idle_driver); - if (result < 0) - goto out_proc; + + if (!cpuidle_register_driver(&acpi_idle_driver)) { + printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", + acpi_idle_driver.name); + } else { + printk(KERN_DEBUG "ACPI: acpi_idle yielding to %s", + cpuidle_get_driver()->name); + } result = acpi_bus_register_driver(&acpi_processor_driver); if (result < 0) @@ -941,7 +947,6 @@ static int __init acpi_processor_init(void) out_cpuidle: cpuidle_unregister_driver(&acpi_idle_driver); -out_proc: #ifdef CONFIG_ACPI_PROCFS remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir); #endif diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 13c6cb703f1d..2e8c27d48f2b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -872,6 +872,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, return(acpi_idle_enter_c1(dev, state)); local_irq_disable(); + if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -879,12 +880,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, * NEED_RESCHED: */ smp_mb(); - } - if (unlikely(need_resched())) { - current_thread_info()->status |= TS_POLLING; - local_irq_enable(); - return 0; + if (unlikely(need_resched())) { + current_thread_info()->status |= TS_POLLING; + local_irq_enable(); + return 0; + } } /* @@ -911,7 +912,8 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, sched_clock_idle_wakeup_event(idle_time_ns); local_irq_enable(); - current_thread_info()->status |= TS_POLLING; + if (cx->entry_method != ACPI_CSTATE_FFH) + current_thread_info()->status |= TS_POLLING; cx->usage++; @@ -962,6 +964,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, } local_irq_disable(); + if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -969,12 +972,12 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, * NEED_RESCHED: */ smp_mb(); - } - if (unlikely(need_resched())) { - current_thread_info()->status |= TS_POLLING; - local_irq_enable(); - return 0; + if (unlikely(need_resched())) { + current_thread_info()->status |= TS_POLLING; + local_irq_enable(); + return 0; + } } acpi_unlazy_tlb(smp_processor_id()); @@ -1028,7 +1031,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, sched_clock_idle_wakeup_event(idle_time_ns); local_irq_enable(); - current_thread_info()->status |= TS_POLLING; + if (cx->entry_method != ACPI_CSTATE_FFH) + current_thread_info()->status |= TS_POLLING; cx->usage++; diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 12fdd3987a36..199488576a05 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -156,7 +156,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (dev->enabled) return 0; - if (!cpuidle_curr_driver || !cpuidle_curr_governor) + if (!cpuidle_get_driver() || !cpuidle_curr_governor) return -EIO; if (!dev->state_count) return -EINVAL; @@ -207,7 +207,7 @@ void cpuidle_disable_device(struct cpuidle_device *dev) { if (!dev->enabled) return; - if (!cpuidle_curr_driver || !cpuidle_curr_governor) + if (!cpuidle_get_driver() || !cpuidle_curr_governor) return; dev->enabled = 0; @@ -271,10 +271,11 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); if (!sys_dev) return -EINVAL; - if (!try_module_get(cpuidle_curr_driver->owner)) + if (!try_module_get(cpuidle_driver->owner)) return -EINVAL; init_completion(&dev->kobj_unregister); @@ -284,7 +285,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); if ((ret = cpuidle_add_sysfs(sys_dev))) { - module_put(cpuidle_curr_driver->owner); + module_put(cpuidle_driver->owner); return ret; } @@ -325,6 +326,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); void cpuidle_unregister_device(struct cpuidle_device *dev) { struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu); + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); if (dev->registered == 0) return; @@ -340,7 +342,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_resume_and_unlock(); - module_put(cpuidle_curr_driver->owner); + module_put(cpuidle_driver->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 9476ba33ee2c..33e50d556f17 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -9,7 +9,6 @@ /* For internal use only */ extern struct cpuidle_governor *cpuidle_curr_governor; -extern struct cpuidle_driver *cpuidle_curr_driver; extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; extern struct mutex cpuidle_lock; diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 2257004fe33d..fd1601e3d125 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -14,7 +14,7 @@ #include "cpuidle.h" -struct cpuidle_driver *cpuidle_curr_driver; +static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); /** @@ -39,14 +39,26 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) EXPORT_SYMBOL_GPL(cpuidle_register_driver); +/** + * cpuidle_get_driver - return the current driver + */ +struct cpuidle_driver *cpuidle_get_driver(void) +{ + return cpuidle_curr_driver; +} +EXPORT_SYMBOL_GPL(cpuidle_get_driver); + /** * cpuidle_unregister_driver - unregisters a driver * @drv: the driver */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { - if (!drv) + if (drv != cpuidle_curr_driver) { + WARN(1, "invalid cpuidle_unregister_driver(%s)\n", + drv->name); return; + } spin_lock(&cpuidle_driver_lock); cpuidle_curr_driver = NULL; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 0ba9c8b8ee74..0310ffaec9df 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -47,10 +47,11 @@ static ssize_t show_current_driver(struct sysdev_class *class, char *buf) { ssize_t ret; + struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); spin_lock(&cpuidle_driver_lock); - if (cpuidle_curr_driver) - ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name); + if (cpuidle_driver) + ret = sprintf(buf, "%s\n", cpuidle_driver->name); else ret = sprintf(buf, "none\n"); spin_unlock(&cpuidle_driver_lock); diff --git a/drivers/idle/Kconfig b/drivers/idle/Kconfig index f15e90a453d1..fb5c5186d4aa 100644 --- a/drivers/idle/Kconfig +++ b/drivers/idle/Kconfig @@ -1,3 +1,14 @@ +config INTEL_IDLE + tristate "Cpuidle Driver for Intel Processors" + depends on CPU_IDLE + depends on X86 + depends on CPU_SUP_INTEL + depends on EXPERIMENTAL + help + Enable intel_idle, a cpuidle driver that includes knowledge of + native Intel hardware idle features. The acpi_idle driver + can be configured at the same time, in order to handle + processors intel_idle does not support. menu "Memory power savings" depends on X86_64 diff --git a/drivers/idle/Makefile b/drivers/idle/Makefile index 5f68fc377e21..23d295cf10f2 100644 --- a/drivers/idle/Makefile +++ b/drivers/idle/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_I7300_IDLE) += i7300_idle.o +obj-$(CONFIG_INTEL_IDLE) += intel_idle.o diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c new file mode 100755 index 000000000000..54f0fb4cd5d2 --- /dev/null +++ b/drivers/idle/intel_idle.c @@ -0,0 +1,461 @@ +/* + * intel_idle.c - native hardware idle loop for modern Intel processors + * + * Copyright (c) 2010, Intel Corporation. + * Len Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * intel_idle is a cpuidle driver that loads on specific Intel processors + * in lieu of the legacy ACPI processor_idle driver. The intent is to + * make Linux more efficient on these processors, as intel_idle knows + * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. + */ + +/* + * Design Assumptions + * + * All CPUs have same idle states as boot CPU + * + * Chipset BM_STS (bus master status) bit is a NOP + * for preventing entry into deep C-stats + */ + +/* + * Known limitations + * + * The driver currently initializes for_each_online_cpu() upon modprobe. + * It it unaware of subsequent processors hot-added to the system. + * This means that if you boot with maxcpus=n and later online + * processors above n, those processors will use C1 only. + * + * ACPI has a .suspend hack to turn off deep c-statees during suspend + * to avoid complications with the lapic timer workaround. + * Have not seen issues with suspend, but may need same workaround here. + * + * There is currently no kernel-based automatic probing/loading mechanism + * if the driver is built as a module. + */ + +/* un-comment DEBUG to enable pr_debug() statements */ +#define DEBUG + +#include +#include +#include +#include /* ktime_get_real() */ +#include +#include + +#define INTEL_IDLE_VERSION "0.4" +#define PREFIX "intel_idle: " + +#define MWAIT_SUBSTATE_MASK (0xf) +#define MWAIT_CSTATE_MASK (0xf) +#define MWAIT_SUBSTATE_SIZE (4) +#define MWAIT_MAX_NUM_CSTATES 8 +#define CPUID_MWAIT_LEAF (5) +#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) +#define CPUID5_ECX_INTERRUPT_BREAK (0x2) + +static struct cpuidle_driver intel_idle_driver = { + .name = "intel_idle", + .owner = THIS_MODULE, +}; +/* intel_idle.max_cstate=0 disables driver */ +static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; +static int power_policy = 7; /* 0 = max perf; 15 = max powersave */ + +static unsigned int substates; +static int (*choose_substate)(int); + +/* Reliable LAPIC Timer States, bit 1 for C1 etc. */ +static unsigned int lapic_timer_reliable_states; + +static struct cpuidle_device *intel_idle_cpuidle_devices; +static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); + +static struct cpuidle_state *cpuidle_state_table; + +/* + * States are indexed by the cstate number, + * which is also the index into the MWAIT hint array. + * Thus C0 is a dummy. + */ +static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { + { /* MWAIT C0 */ }, + { /* MWAIT C1 */ + .name = "NHM-C1", + .desc = "MWAIT 0x00", + .driver_data = (void *) 0x00, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 3, + .power_usage = 1000, + .target_residency = 6, + .enter = &intel_idle }, + { /* MWAIT C2 */ + .name = "NHM-C3", + .desc = "MWAIT 0x10", + .driver_data = (void *) 0x10, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 20, + .power_usage = 500, + .target_residency = 80, + .enter = &intel_idle }, + { /* MWAIT C3 */ + .name = "NHM-C6", + .desc = "MWAIT 0x20", + .driver_data = (void *) 0x20, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 200, + .power_usage = 350, + .target_residency = 800, + .enter = &intel_idle }, +}; + +static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { + { /* MWAIT C0 */ }, + { /* MWAIT C1 */ + .name = "ATM-C1", + .desc = "MWAIT 0x00", + .driver_data = (void *) 0x00, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .power_usage = 1000, + .target_residency = 4, + .enter = &intel_idle }, + { /* MWAIT C2 */ + .name = "ATM-C2", + .desc = "MWAIT 0x10", + .driver_data = (void *) 0x10, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 20, + .power_usage = 500, + .target_residency = 80, + .enter = &intel_idle }, + { /* MWAIT C3 */ }, + { /* MWAIT C4 */ + .name = "ATM-C4", + .desc = "MWAIT 0x30", + .driver_data = (void *) 0x30, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 100, + .power_usage = 250, + .target_residency = 400, + .enter = &intel_idle }, + { /* MWAIT C5 */ }, + { /* MWAIT C6 */ + .name = "ATM-C6", + .desc = "MWAIT 0x40", + .driver_data = (void *) 0x40, + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 200, + .power_usage = 150, + .target_residency = 800, + .enter = NULL }, /* disabled */ +}; + +/* + * choose_tunable_substate() + * + * Run-time decision on which C-state substate to invoke + * If power_policy = 0, choose shallowest substate (0) + * If power_policy = 15, choose deepest substate + * If power_policy = middle, choose middle substate etc. + */ +static int choose_tunable_substate(int cstate) +{ + unsigned int num_substates; + unsigned int substate_choice; + + power_policy &= 0xF; /* valid range: 0-15 */ + cstate &= 7; /* valid range: 0-7 */ + + num_substates = (substates >> ((cstate) * 4)) & MWAIT_SUBSTATE_MASK; + + if (num_substates <= 1) + return 0; + + substate_choice = ((power_policy + (power_policy + 1) * + (num_substates - 1)) / 16); + + return substate_choice; +} + +/* + * choose_zero_substate() + */ +static int choose_zero_substate(int cstate) +{ + return 0; +} + +/** + * intel_idle + * @dev: cpuidle_device + * @state: cpuidle state + * + */ +static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) +{ + unsigned long ecx = 1; /* break on interrupt flag */ + unsigned long eax = (unsigned long)cpuidle_get_statedata(state); + unsigned int cstate; + ktime_t kt_before, kt_after; + s64 usec_delta; + int cpu = smp_processor_id(); + + cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; + + eax = eax + (choose_substate)(cstate); + + local_irq_disable(); + + if (!(lapic_timer_reliable_states & (1 << (cstate)))) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + + kt_before = ktime_get_real(); + + stop_critical_timings(); +#ifndef MODULE + trace_power_start(POWER_CSTATE, (eax >> 4) + 1); +#endif + if (!need_resched()) { + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(eax, ecx); + } + + start_critical_timings(); + + kt_after = ktime_get_real(); + usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before)); + + local_irq_enable(); + + if (!(lapic_timer_reliable_states & (1 << (cstate)))) + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + + return usec_delta; +} + +/* + * intel_idle_probe() + */ +static int intel_idle_probe(void) +{ + unsigned int eax, ebx, ecx, edx; + + if (max_cstate == 0) { + pr_debug(PREFIX "disabled\n"); + return -EPERM; + } + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_MWAIT)) + return -ENODEV; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return -ENODEV; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return -ENODEV; +#ifdef DEBUG + if (substates == 0) /* can over-ride via modparam */ +#endif + substates = edx; + + pr_debug(PREFIX "MWAIT substates: 0x%x\n", substates); + + if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ + lapic_timer_reliable_states = 0xFFFFFFFF; + + if (boot_cpu_data.x86 != 6) /* family 6 */ + return -ENODEV; + + switch (boot_cpu_data.x86_model) { + + case 0x1A: /* Core i7, Xeon 5500 series */ + case 0x1E: /* Core i7 and i5 Processor - Lynnfield Jasper Forest */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case 0x2E: /* Nehalem-EX Xeon */ + lapic_timer_reliable_states = (1 << 1); /* C1 */ + + case 0x25: /* Westmere */ + case 0x2C: /* Westmere */ + cpuidle_state_table = nehalem_cstates; + choose_substate = choose_tunable_substate; + break; + + case 0x1C: /* 28 - Atom Processor */ + lapic_timer_reliable_states = (1 << 2) | (1 << 1); /* C2, C1 */ + cpuidle_state_table = atom_cstates; + choose_substate = choose_zero_substate; + break; +#ifdef FUTURE_USE + case 0x17: /* 23 - Core 2 Duo */ + lapic_timer_reliable_states = (1 << 2) | (1 << 1); /* C2, C1 */ +#endif + + default: + pr_debug(PREFIX "does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + return -ENODEV; + } + + pr_debug(PREFIX "v" INTEL_IDLE_VERSION + " model 0x%X\n", boot_cpu_data.x86_model); + + pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", + lapic_timer_reliable_states); + return 0; +} + +/* + * intel_idle_cpuidle_devices_uninit() + * unregister, free cpuidle_devices + */ +static void intel_idle_cpuidle_devices_uninit(void) +{ + int i; + struct cpuidle_device *dev; + + for_each_online_cpu(i) { + dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); + cpuidle_unregister_device(dev); + } + + free_percpu(intel_idle_cpuidle_devices); + return; +} +/* + * intel_idle_cpuidle_devices_init() + * allocate, initialize, register cpuidle_devices + */ +static int intel_idle_cpuidle_devices_init(void) +{ + int i, cstate; + struct cpuidle_device *dev; + + intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (intel_idle_cpuidle_devices == NULL) + return -ENOMEM; + + for_each_online_cpu(i) { + dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); + + dev->state_count = 1; + + for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { + int num_substates; + + if (cstate > max_cstate) { + printk(PREFIX "max_cstate %d reached\n", + max_cstate); + break; + } + + /* does the state exist in CPUID.MWAIT? */ + num_substates = (substates >> ((cstate) * 4)) + & MWAIT_SUBSTATE_MASK; + if (num_substates == 0) + continue; + /* is the state not enabled? */ + if (cpuidle_state_table[cstate].enter == NULL) { + /* does the driver not know about the state? */ + if (*cpuidle_state_table[cstate].name == '\0') + pr_debug(PREFIX "unaware of model 0x%x" + " MWAIT %d please" + " contact lenb@kernel.org", + boot_cpu_data.x86_model, cstate); + continue; + } + + if ((cstate > 2) && + !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle" + " states deeper than C2"); + + dev->states[dev->state_count] = /* structure copy */ + cpuidle_state_table[cstate]; + + dev->state_count += 1; + } + + dev->cpu = i; + if (cpuidle_register_device(dev)) { + pr_debug(PREFIX "cpuidle_register_device %d failed!\n", + i); + intel_idle_cpuidle_devices_uninit(); + return -EIO; + } + } + + return 0; +} + + +static int __init intel_idle_init(void) +{ + int retval; + + retval = intel_idle_probe(); + if (retval) + return retval; + + retval = cpuidle_register_driver(&intel_idle_driver); + if (retval) { + printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", + cpuidle_get_driver()->name); + return retval; + } + + retval = intel_idle_cpuidle_devices_init(); + if (retval) { + cpuidle_unregister_driver(&intel_idle_driver); + return retval; + } + + return 0; +} + +static void __exit intel_idle_exit(void) +{ + intel_idle_cpuidle_devices_uninit(); + cpuidle_unregister_driver(&intel_idle_driver); + + return; +} + +module_init(intel_idle_init); +module_exit(intel_idle_exit); + +module_param(power_policy, int, 0644); +module_param(max_cstate, int, 0444); +#ifdef DEBUG +module_param(substates, int, 0444); +#endif + +MODULE_AUTHOR("Len Brown "); +MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION); +MODULE_LICENSE("GPL"); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index dcf77fa826b5..55215cce5005 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -125,6 +125,7 @@ struct cpuidle_driver { #ifdef CONFIG_CPU_IDLE extern int cpuidle_register_driver(struct cpuidle_driver *drv); +struct cpuidle_driver *cpuidle_get_driver(void); extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern int cpuidle_register_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev); @@ -137,16 +138,17 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); #else static inline int cpuidle_register_driver(struct cpuidle_driver *drv) -{return 0;} +{return -ENODEV; } +static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline int cpuidle_register_device(struct cpuidle_device *dev) -{return 0;} +{return -ENODEV; } static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { } static inline void cpuidle_pause_and_lock(void) { } static inline void cpuidle_resume_and_unlock(void) { } static inline int cpuidle_enable_device(struct cpuidle_device *dev) -{return 0;} +{return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } #endif