sched/deadline: Fix the intention to re-evalute tick dependency for offline CPU
The dl task will be replenished after dl task timer fire and start a new period. It will be enqueued and to re-evaluate its dependency on the tick in order to restart it. However, if the CPU is hot-unplugged, irq_work_queue will splash since the target CPU is offline. As a result we get: WARNING: CPU: 2 PID: 0 at kernel/irq_work.c:69 irq_work_queue_on+0xad/0xe0 Call Trace: dump_stack+0x99/0xd0 __warn+0xd1/0xf0 warn_slowpath_null+0x1d/0x20 irq_work_queue_on+0xad/0xe0 tick_nohz_full_kick_cpu+0x44/0x50 tick_nohz_dep_set_cpu+0x74/0xb0 enqueue_task_dl+0x226/0x480 activate_task+0x5c/0xa0 dl_task_timer+0x19b/0x2c0 ? push_dl_task.part.31+0x190/0x190 This can be triggered by hot-unplugging the full dynticks CPU which dl task is running on. We enqueue the dl task on the offline CPU, because we need to do replenish for start_dl_timer(). So, as Juri pointed out, we would need to do is calling replenish_dl_entity() directly, instead of enqueue_task_dl(). pi_se shouldn't be a problem as the task shouldn't be boosted if it was throttled. This patch fixes it by avoiding the whole enqueue+dequeue+enqueue story, by first migrating (set_task_cpu()) and then doing 1 enqueue. Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Juri Lelli <juri.lelli@arm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Luca Abeni <luca.abeni@unitn.it> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1472639264-3932-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
efca03ecbe
commit
61c7aca695
|
@ -243,10 +243,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
|
|||
static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
struct rq *later_rq = NULL;
|
||||
bool fallback = false;
|
||||
|
||||
later_rq = find_lock_later_rq(p, rq);
|
||||
|
||||
if (!later_rq) {
|
||||
int cpu;
|
||||
|
||||
|
@ -254,7 +252,6 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
|||
* If we cannot preempt any rq, fall back to pick any
|
||||
* online cpu.
|
||||
*/
|
||||
fallback = true;
|
||||
cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
|
||||
if (cpu >= nr_cpu_ids) {
|
||||
/*
|
||||
|
@ -274,16 +271,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
|
|||
double_lock_balance(rq, later_rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* By now the task is replenished and enqueued; migrate it.
|
||||
*/
|
||||
deactivate_task(rq, p, 0);
|
||||
set_task_cpu(p, later_rq->cpu);
|
||||
activate_task(later_rq, p, 0);
|
||||
|
||||
if (!fallback)
|
||||
resched_curr(later_rq);
|
||||
|
||||
double_unlock_balance(later_rq, rq);
|
||||
|
||||
return later_rq;
|
||||
|
@ -641,6 +629,24 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
|||
goto unlock;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (unlikely(!rq->online)) {
|
||||
/*
|
||||
* If the runqueue is no longer available, migrate the
|
||||
* task elsewhere. This necessarily changes rq.
|
||||
*/
|
||||
lockdep_unpin_lock(&rq->lock, rf.cookie);
|
||||
rq = dl_task_offline_migration(rq, p);
|
||||
rf.cookie = lockdep_pin_lock(&rq->lock);
|
||||
|
||||
/*
|
||||
* Now that the task has been migrated to the new RQ and we
|
||||
* have that locked, proceed as normal and enqueue the task
|
||||
* there.
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
|
||||
if (dl_task(rq->curr))
|
||||
check_preempt_curr_dl(rq, p, 0);
|
||||
|
@ -648,22 +654,6 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
|||
resched_curr(rq);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Perform balancing operations here; after the replenishments. We
|
||||
* cannot drop rq->lock before this, otherwise the assertion in
|
||||
* start_dl_timer() about not missing updates is not true.
|
||||
*
|
||||
* If we find that the rq the task was on is no longer available, we
|
||||
* need to select a new rq.
|
||||
*
|
||||
* XXX figure out if select_task_rq_dl() deals with offline cpus.
|
||||
*/
|
||||
if (unlikely(!rq->online)) {
|
||||
lockdep_unpin_lock(&rq->lock, rf.cookie);
|
||||
rq = dl_task_offline_migration(rq, p);
|
||||
rf.cookie = lockdep_pin_lock(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Queueing this task back might have overloaded rq, check if we need
|
||||
* to kick someone away.
|
||||
|
|
Loading…
Reference in New Issue