Merge branch 'timers/core' into sched/idle

Avoid heavy conflicts caused by WIP patches in drivers/cpuidle/cpuidle.c,
by merging these into a single base.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2014-02-28 13:58:25 +01:00
commit d27c8438ee
10 changed files with 283 additions and 40 deletions

View File

@ -140,12 +140,14 @@ int cpuidle_idle_call(void)
return 0;
}
trace_cpu_idle_rcuidle(next_state, dev->cpu);
broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP);
if (broadcast)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
if (broadcast &&
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
return -EBUSY;
trace_cpu_idle_rcuidle(next_state, dev->cpu);
if (cpuidle_state_is_coupled(dev, drv, next_state))
entered_state = cpuidle_enter_state_coupled(dev, drv,
@ -153,11 +155,11 @@ int cpuidle_idle_call(void)
else
entered_state = cpuidle_enter_state(dev, drv, next_state);
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
if (broadcast)
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
/* give the governor an opportunity to reflect on the outcome */
if (cpuidle_curr_governor->reflect)
cpuidle_curr_governor->reflect(dev, entered_state);

View File

@ -62,6 +62,11 @@ enum clock_event_mode {
#define CLOCK_EVT_FEAT_DYNIRQ 0x000020
#define CLOCK_EVT_FEAT_PERCPU 0x000040
/*
* Clockevent device is based on a hrtimer for broadcast
*/
#define CLOCK_EVT_FEAT_HRTIMER 0x000080
/**
* struct clock_event_device - clock event device descriptor
* @event_handler: Assigned by the framework to be called by the low
@ -83,6 +88,7 @@ enum clock_event_mode {
* @name: ptr to clock event name
* @rating: variable to rate clock event devices
* @irq: IRQ number (only for non CPU local devices)
* @bound_on: Bound on CPU
* @cpumask: cpumask to indicate for which CPUs this device works
* @list: list head for the management code
* @owner: module reference
@ -113,6 +119,7 @@ struct clock_event_device {
const char *name;
int rating;
int irq;
int bound_on;
const struct cpumask *cpumask;
struct list_head list;
struct module *owner;
@ -180,15 +187,17 @@ extern int tick_receive_broadcast(void);
#endif
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void tick_setup_hrtimer_broadcast(void);
extern int tick_check_broadcast_expired(void);
#else
static inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) {};
#endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void clockevents_notify(unsigned long reason, void *arg);
extern int clockevents_notify(unsigned long reason, void *arg);
#else
static inline void clockevents_notify(unsigned long reason, void *arg) {}
static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
#endif
#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
@ -196,8 +205,9 @@ static inline void clockevents_notify(unsigned long reason, void *arg) {}
static inline void clockevents_suspend(void) {}
static inline void clockevents_resume(void) {}
static inline void clockevents_notify(unsigned long reason, void *arg) {}
static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
static inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) {};
#endif

View File

@ -124,7 +124,7 @@ config NO_HZ_FULL
endchoice
config NO_HZ_FULL_ALL
bool "Full dynticks system on all CPUs by default"
bool "Full dynticks system on all CPUs by default (except CPU 0)"
depends on NO_HZ_FULL
help
If the user doesn't pass the nohz_full boot option to

View File

@ -3,7 +3,10 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
obj-y += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o

View File

@ -439,6 +439,19 @@ void clockevents_config_and_register(struct clock_event_device *dev,
}
EXPORT_SYMBOL_GPL(clockevents_config_and_register);
int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
clockevents_config(dev, freq);
if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
return clockevents_program_event(dev, dev->next_event, false);
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
return 0;
}
/**
* clockevents_update_freq - Update frequency and reprogram a clock event device.
* @dev: device to modify
@ -446,17 +459,22 @@ EXPORT_SYMBOL_GPL(clockevents_config_and_register);
*
* Reconfigure and reprogram a clock event device in oneshot
* mode. Must be called on the cpu for which the device delivers per
* cpu timer events with interrupts disabled! Returns 0 on success,
* -ETIME when the event is in the past.
* cpu timer events. If called for the broadcast device the core takes
* care of serialization.
*
* Returns 0 on success, -ETIME when the event is in the past.
*/
int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
{
clockevents_config(dev, freq);
unsigned long flags;
int ret;
if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
return 0;
return clockevents_program_event(dev, dev->next_event, false);
local_irq_save(flags);
ret = tick_broadcast_update_freq(dev, freq);
if (ret == -ENODEV)
ret = __clockevents_update_freq(dev, freq);
local_irq_restore(flags);
return ret;
}
/*
@ -524,12 +542,13 @@ void clockevents_resume(void)
#ifdef CONFIG_GENERIC_CLOCKEVENTS
/**
* clockevents_notify - notification about relevant events
* Returns 0 on success, any other value on error
*/
void clockevents_notify(unsigned long reason, void *arg)
int clockevents_notify(unsigned long reason, void *arg)
{
struct clock_event_device *dev, *tmp;
unsigned long flags;
int cpu;
int cpu, ret = 0;
raw_spin_lock_irqsave(&clockevents_lock, flags);
@ -542,7 +561,7 @@ void clockevents_notify(unsigned long reason, void *arg)
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
tick_broadcast_oneshot_control(reason);
ret = tick_broadcast_oneshot_control(reason);
break;
case CLOCK_EVT_NOTIFY_CPU_DYING:
@ -585,6 +604,7 @@ void clockevents_notify(unsigned long reason, void *arg)
break;
}
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(clockevents_notify);

View File

@ -514,12 +514,13 @@ static void sync_cmos_clock(struct work_struct *work)
next.tv_sec++;
next.tv_nsec -= NSEC_PER_SEC;
}
schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
queue_delayed_work(system_power_efficient_wq,
&sync_cmos_work, timespec_to_jiffies(&next));
}
void ntp_notify_cmos_timer(void)
{
schedule_delayed_work(&sync_cmos_work, 0);
queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
}
#else

View File

@ -0,0 +1,106 @@
/*
* linux/kernel/time/tick-broadcast-hrtimer.c
* This file emulates a local clock event device
* via a pseudo clock device.
*/
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/clockchips.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/module.h>
#include "tick-internal.h"
static struct hrtimer bctimer;
static void bc_set_mode(enum clock_event_mode mode,
struct clock_event_device *bc)
{
switch (mode) {
case CLOCK_EVT_MODE_SHUTDOWN:
/*
* Note, we cannot cancel the timer here as we might
* run into the following live lock scenario:
*
* cpu 0 cpu1
* lock(broadcast_lock);
* hrtimer_interrupt()
* bc_handler()
* tick_handle_oneshot_broadcast();
* lock(broadcast_lock);
* hrtimer_cancel()
* wait_for_callback()
*/
hrtimer_try_to_cancel(&bctimer);
break;
default:
break;
}
}
/*
* This is called from the guts of the broadcast code when the cpu
* which is about to enter idle has the earliest broadcast timer event.
*/
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
{
/*
* We try to cancel the timer first. If the callback is on
* flight on some other cpu then we let it handle it. If we
* were able to cancel the timer nothing can rearm it as we
* own broadcast_lock.
*
* However we can also be called from the event handler of
* ce_broadcast_hrtimer itself when it expires. We cannot
* restart the timer because we are in the callback, but we
* can set the expiry time and let the callback return
* HRTIMER_RESTART.
*/
if (hrtimer_try_to_cancel(&bctimer) >= 0) {
hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
/* Bind the "device" to the cpu */
bc->bound_on = smp_processor_id();
} else if (bc->bound_on == smp_processor_id()) {
hrtimer_set_expires(&bctimer, expires);
}
return 0;
}
static struct clock_event_device ce_broadcast_hrtimer = {
.set_mode = bc_set_mode,
.set_next_ktime = bc_set_next,
.features = CLOCK_EVT_FEAT_ONESHOT |
CLOCK_EVT_FEAT_KTIME |
CLOCK_EVT_FEAT_HRTIMER,
.rating = 0,
.bound_on = -1,
.min_delta_ns = 1,
.max_delta_ns = KTIME_MAX,
.min_delta_ticks = 1,
.max_delta_ticks = ULONG_MAX,
.mult = 1,
.shift = 0,
.cpumask = cpu_all_mask,
};
static enum hrtimer_restart bc_handler(struct hrtimer *t)
{
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
return HRTIMER_NORESTART;
return HRTIMER_RESTART;
}
void tick_setup_hrtimer_broadcast(void)
{
hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}

View File

@ -120,6 +120,19 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
return (dev && tick_broadcast_device.evtdev == dev);
}
int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
{
int ret = -ENODEV;
if (tick_is_broadcast_device(dev)) {
raw_spin_lock(&tick_broadcast_lock);
ret = __clockevents_update_freq(dev, freq);
raw_spin_unlock(&tick_broadcast_lock);
}
return ret;
}
static void err_broadcast(const struct cpumask *mask)
{
pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
@ -272,12 +285,8 @@ static void tick_do_broadcast(struct cpumask *mask)
*/
static void tick_do_periodic_broadcast(void)
{
raw_spin_lock(&tick_broadcast_lock);
cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
tick_do_broadcast(tmpmask);
raw_spin_unlock(&tick_broadcast_lock);
}
/*
@ -287,13 +296,15 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
ktime_t next;
raw_spin_lock(&tick_broadcast_lock);
tick_do_periodic_broadcast();
/*
* The device is in periodic mode. No reprogramming necessary:
*/
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
return;
goto unlock;
/*
* Setup the next period for devices, which do not have
@ -306,9 +317,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
return;
goto unlock;
tick_do_periodic_broadcast();
}
unlock:
raw_spin_unlock(&tick_broadcast_lock);
}
/*
@ -630,24 +643,61 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
raw_spin_unlock(&tick_broadcast_lock);
}
static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
{
if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
return 0;
if (bc->next_event.tv64 == KTIME_MAX)
return 0;
return bc->bound_on == cpu ? -EBUSY : 0;
}
static void broadcast_shutdown_local(struct clock_event_device *bc,
struct clock_event_device *dev)
{
/*
* For hrtimer based broadcasting we cannot shutdown the cpu
* local device if our own event is the first one to expire or
* if we own the broadcast timer.
*/
if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
if (broadcast_needs_cpu(bc, smp_processor_id()))
return;
if (dev->next_event.tv64 < bc->next_event.tv64)
return;
}
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
}
static void broadcast_move_bc(int deadcpu)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
if (!bc || !broadcast_needs_cpu(bc, deadcpu))
return;
/* This moves the broadcast assignment to this cpu */
clockevents_program_event(bc, bc->next_event, 1);
}
/*
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
* Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
*/
void tick_broadcast_oneshot_control(unsigned long reason)
int tick_broadcast_oneshot_control(unsigned long reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags;
ktime_t now;
int cpu;
int cpu, ret = 0;
/*
* Periodic mode does not care about the enter/exit of power
* states
*/
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
return;
return 0;
/*
* We are called with preemtion disabled from the depth of the
@ -658,7 +708,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
dev = td->evtdev;
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
return;
return 0;
bc = tick_broadcast_device.evtdev;
@ -666,7 +716,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
broadcast_shutdown_local(bc, dev);
/*
* We only reprogram the broadcast timer if we
* did not mark ourself in the force mask and
@ -679,6 +729,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
}
/*
* If the current CPU owns the hrtimer broadcast
* mechanism, it cannot go deep idle and we remove the
* CPU from the broadcast mask. We don't have to go
* through the EXIT path as the local timer is not
* shutdown.
*/
ret = broadcast_needs_cpu(bc, cpu);
if (ret)
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
} else {
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
@ -746,6 +806,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
}
out:
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
/*
@ -852,6 +913,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
broadcast_move_bc(cpu);
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}

View File

@ -46,7 +46,7 @@ extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_oneshot_control(unsigned long reason);
extern int tick_broadcast_oneshot_control(unsigned long reason);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
@ -58,7 +58,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
@ -87,7 +87,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
BUG();
}
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
{
@ -111,6 +111,7 @@ extern int tick_resume_broadcast(void);
extern void tick_broadcast_init(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
#else /* !BROADCAST */
@ -133,6 +134,8 @@ static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
static inline void tick_suspend_broadcast(void) { }
static inline int tick_resume_broadcast(void) { return 0; }
static inline void tick_broadcast_init(void) { }
static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
u32 freq) { return -ENODEV; }
/*
* Set the periodic handler in non broadcast mode
@ -152,6 +155,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
#endif
extern void do_timer(unsigned long ticks);

View File

@ -81,6 +81,7 @@ struct tvec_base {
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
unsigned long all_timers;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@ -337,6 +338,20 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
}
EXPORT_SYMBOL_GPL(set_timer_slack);
/*
* If the list is empty, catch up ->timer_jiffies to the current time.
* The caller must hold the tvec_base lock. Returns true if the list
* was empty and therefore ->timer_jiffies was updated.
*/
static bool catchup_timer_jiffies(struct tvec_base *base)
{
if (!base->all_timers) {
base->timer_jiffies = jiffies;
return true;
}
return false;
}
static void
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
@ -383,15 +398,17 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
(void)catchup_timer_jiffies(base);
__internal_add_timer(base, timer);
/*
* Update base->active_timers and base->next_timer
*/
if (!tbase_get_deferrable(timer->base)) {
if (time_before(timer->expires, base->next_timer))
if (!base->active_timers++ ||
time_before(timer->expires, base->next_timer))
base->next_timer = timer->expires;
base->active_timers++;
}
base->all_timers++;
}
#ifdef CONFIG_TIMER_STATS
@ -671,6 +688,8 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
detach_timer(timer, true);
if (!tbase_get_deferrable(timer->base))
base->active_timers--;
base->all_timers--;
(void)catchup_timer_jiffies(base);
}
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@ -685,6 +704,8 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
base->all_timers--;
(void)catchup_timer_jiffies(base);
return 1;
}
@ -939,8 +960,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
* with the timer by holding the timer base lock. This also
* makes sure that a CPU on the way to stop its tick can not
* evaluate the timer wheel.
*
* Spare the IPI for deferrable timers on idle targets though.
* The next busy ticks will take care of it. Except full dynticks
* require special care against races with idle_cpu(), lets deal
* with that later.
*/
wake_up_nohz_cpu(cpu);
if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu))
wake_up_nohz_cpu(cpu);
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);
@ -1146,6 +1174,10 @@ static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
spin_lock_irq(&base->lock);
if (catchup_timer_jiffies(base)) {
spin_unlock_irq(&base->lock);
return;
}
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list;
struct list_head *head = &work_list;
@ -1559,6 +1591,7 @@ static int init_timers_cpu(int cpu)
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
base->active_timers = 0;
base->all_timers = 0;
return 0;
}