diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 005c0ca81a32..2100aad6b5f2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -393,6 +393,7 @@ int tick_resume_broadcast(void) static cpumask_var_t tick_broadcast_oneshot_mask; static cpumask_var_t tick_broadcast_pending_mask; +static cpumask_var_t tick_broadcast_force_mask; /* * Exposed for debugging: see timer_list.c @@ -483,6 +484,10 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) } } + /* Take care of enforced broadcast requests */ + cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); + cpumask_clear(tick_broadcast_force_mask); + /* * Wakeup the cpus which have an expired event. */ @@ -518,6 +523,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) struct clock_event_device *bc, *dev; struct tick_device *td; unsigned long flags; + ktime_t now; int cpu; /* @@ -545,7 +551,16 @@ void tick_broadcast_oneshot_control(unsigned long reason) WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); - if (dev->next_event.tv64 < bc->next_event.tv64) + /* + * We only reprogram the broadcast timer if we + * did not mark ourself in the force mask and + * if the cpu local event is earlier than the + * broadcast event. If the current CPU is in + * the force mask, then we are going to be + * woken by the IPI right away. + */ + if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && + dev->next_event.tv64 < bc->next_event.tv64) tick_broadcast_set_event(bc, cpu, dev->next_event, 1); } } else { @@ -566,6 +581,47 @@ void tick_broadcast_oneshot_control(unsigned long reason) tick_broadcast_pending_mask)) goto out; + /* + * If the pending bit is not set, then we are + * either the CPU handling the broadcast + * interrupt or we got woken by something else. + * + * We are not longer in the broadcast mask, so + * if the cpu local expiry time is already + * reached, we would reprogram the cpu local + * timer with an already expired event. + * + * This can lead to a ping-pong when we return + * to idle and therefor rearm the broadcast + * timer before the cpu local timer was able + * to fire. This happens because the forced + * reprogramming makes sure that the event + * will happen in the future and depending on + * the min_delta setting this might be far + * enough out that the ping-pong starts. + * + * If the cpu local next_event has expired + * then we know that the broadcast timer + * next_event has expired as well and + * broadcast is about to be handled. So we + * avoid reprogramming and enforce that the + * broadcast handler, which did not run yet, + * will invoke the cpu local handler. + * + * We cannot call the handler directly from + * here, because we might be in a NOHZ phase + * and we did not go through the irq_enter() + * nohz fixups. + */ + now = ktime_get(); + if (dev->next_event.tv64 <= now.tv64) { + cpumask_set_cpu(cpu, tick_broadcast_force_mask); + goto out; + } + /* + * We got woken by something else. Reprogram + * the cpu local timer device. + */ tick_program_event(dev->next_event, 1); } } @@ -707,5 +763,6 @@ void __init tick_broadcast_init(void) #ifdef CONFIG_TICK_ONESHOT alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); + alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); #endif }