mirror of https://gitee.com/openkylin/linux.git
sched/core: Fix task and run queue sched_info::run_delay inconsistencies
Mike Meyer reported the following bug: > During evaluation of some performance data, it was discovered thread > and run queue run_delay accounting data was inconsistent with the other > accounting data that was collected. Further investigation found under > certain circumstances execution time was leaking into the task and > run queue accounting of run_delay. > > Consider the following sequence: > > a. thread is running. > b. thread moves beween cgroups, changes scheduling class or priority. > c. thread sleeps OR > d. thread involuntarily gives up cpu. > > a. implies: > > thread->sched_info.last_queued = 0 > > a. and b. results in the following: > > 1. dequeue_task(rq, thread) > > sched_info_dequeued(rq, thread) > delta = 0 > > sched_info_reset_dequeued(thread) > thread->sched_info.last_queued = 0 > > thread->sched_info.run_delay += delta > > 2. enqueue_task(rq, thread) > > sched_info_queued(rq, thread) > > /* thread is still on cpu at this point. */ > thread->sched_info.last_queued = task_rq(thread)->clock; > > c. results in: > > dequeue_task(rq, thread) > > sched_info_dequeued(rq, thread) > > /* delta is execution time not run_delay. */ > delta = task_rq(thread)->clock - thread->sched_info.last_queued > > sched_info_reset_dequeued(thread) > thread->sched_info.last_queued = 0 > > thread->sched_info.run_delay += delta > > Since thread was running between enqueue_task(rq, thread) and > dequeue_task(rq, thread), the delta above is really execution > time and not run_delay. > > d. results in: > > __sched_info_switch(thread, next_thread) > > sched_info_depart(rq, thread) > > sched_info_queued(rq, thread) > > /* last_queued not updated due to being non-zero */ > return > > Since thread was running between enqueue_task(rq, thread) and > __sched_info_switch(thread, next_thread), the execution time > between enqueue_task(rq, thread) and > __sched_info_switch(thread, next_thread) now will become > associated with run_delay due to when last_queued was last updated. > This alternative patch solves the problem by not calling sched_info_{de,}queued() in {de,en}queue_task(). Therefore the sched_info state is preserved and things work as expected. By inlining the {de,en}queue_task() functions the new condition becomes (mostly) a compile-time constant and we'll not emit any new branch instructions. It even shrinks the code (due to inlining {en,de}queue_task()): $ size defconfig-build/kernel/sched/core.o defconfig-build/kernel/sched/core.o.orig text data bss dec hex filename 64019 23378 2344 89741 15e8d defconfig-build/kernel/sched/core.o 64149 23378 2344 89871 15f0f defconfig-build/kernel/sched/core.o.orig Reported-by: Mike Meyer <Mike.Meyer@Teradata.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20150930154413.GO3604@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
b52da86e0a
commit
1de64443d7
|
@ -827,17 +827,19 @@ static void set_load_weight(struct task_struct *p)
|
|||
load->inv_weight = prio_to_wmult[prio];
|
||||
}
|
||||
|
||||
static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
sched_info_queued(rq, p);
|
||||
if (!(flags & ENQUEUE_RESTORE))
|
||||
sched_info_queued(rq, p);
|
||||
p->sched_class->enqueue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
update_rq_clock(rq);
|
||||
sched_info_dequeued(rq, p);
|
||||
if (!(flags & DEQUEUE_SAVE))
|
||||
sched_info_dequeued(rq, p);
|
||||
p->sched_class->dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
|
@ -1178,7 +1180,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
|||
* holding rq->lock.
|
||||
*/
|
||||
lockdep_assert_held(&rq->lock);
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
}
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
@ -1188,7 +1190,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
|||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, p, 0);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1692,7 +1694,7 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
|
|||
#endif /* CONFIG_SCHEDSTATS */
|
||||
}
|
||||
|
||||
static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
|
||||
static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
|
||||
{
|
||||
activate_task(rq, p, en_flags);
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
|
@ -3325,7 +3327,7 @@ EXPORT_SYMBOL(default_wake_function);
|
|||
*/
|
||||
void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
{
|
||||
int oldprio, queued, running, enqueue_flag = 0;
|
||||
int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
|
||||
struct rq *rq;
|
||||
const struct sched_class *prev_class;
|
||||
|
||||
|
@ -3357,7 +3359,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
|
@ -3375,7 +3377,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.dl_boosted = 1;
|
||||
enqueue_flag = ENQUEUE_REPLENISH;
|
||||
enqueue_flag |= ENQUEUE_REPLENISH;
|
||||
} else
|
||||
p->dl.dl_boosted = 0;
|
||||
p->sched_class = &dl_sched_class;
|
||||
|
@ -3383,7 +3385,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
|||
if (dl_prio(oldprio))
|
||||
p->dl.dl_boosted = 0;
|
||||
if (oldprio < prio)
|
||||
enqueue_flag = ENQUEUE_HEAD;
|
||||
enqueue_flag |= ENQUEUE_HEAD;
|
||||
p->sched_class = &rt_sched_class;
|
||||
} else {
|
||||
if (dl_prio(oldprio))
|
||||
|
@ -3435,7 +3437,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
|||
}
|
||||
queued = task_on_rq_queued(p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
|
||||
p->static_prio = NICE_TO_PRIO(nice);
|
||||
set_load_weight(p);
|
||||
|
@ -3444,7 +3446,7 @@ void set_user_nice(struct task_struct *p, long nice)
|
|||
delta = p->prio - old_prio;
|
||||
|
||||
if (queued) {
|
||||
enqueue_task(rq, p, 0);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
/*
|
||||
* If the task increased its priority or is running and
|
||||
* lowered its priority, then reschedule its CPU:
|
||||
|
@ -3946,7 +3948,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
|||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
|
@ -3956,11 +3958,15 @@ static int __sched_setscheduler(struct task_struct *p,
|
|||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued) {
|
||||
int enqueue_flags = ENQUEUE_RESTORE;
|
||||
/*
|
||||
* We enqueue to tail when the priority of a task is
|
||||
* increased (user space view).
|
||||
*/
|
||||
enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
|
||||
if (oldprio <= p->prio)
|
||||
enqueue_flags |= ENQUEUE_HEAD;
|
||||
|
||||
enqueue_task(rq, p, enqueue_flags);
|
||||
}
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
|
@ -5109,7 +5115,7 @@ void sched_setnuma(struct task_struct *p, int nid)
|
|||
running = task_current(rq, p);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, p, 0);
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
|
@ -5118,7 +5124,7 @@ void sched_setnuma(struct task_struct *p, int nid)
|
|||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, p, 0);
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
@ -7737,7 +7743,7 @@ void sched_move_task(struct task_struct *tsk)
|
|||
queued = task_on_rq_queued(tsk);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, tsk, 0);
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE);
|
||||
if (unlikely(running))
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
|
@ -7761,7 +7767,7 @@ void sched_move_task(struct task_struct *tsk)
|
|||
if (unlikely(running))
|
||||
tsk->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, 0);
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE);
|
||||
|
||||
task_rq_unlock(rq, tsk, &flags);
|
||||
}
|
||||
|
|
|
@ -1151,16 +1151,18 @@ static const u32 prio_to_wmult[40] = {
|
|||
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
|
||||
};
|
||||
|
||||
#define ENQUEUE_WAKEUP 1
|
||||
#define ENQUEUE_HEAD 2
|
||||
#define ENQUEUE_WAKEUP 0x01
|
||||
#define ENQUEUE_HEAD 0x02
|
||||
#ifdef CONFIG_SMP
|
||||
#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */
|
||||
#define ENQUEUE_WAKING 0x04 /* sched_class::task_waking was called */
|
||||
#else
|
||||
#define ENQUEUE_WAKING 0
|
||||
#define ENQUEUE_WAKING 0x00
|
||||
#endif
|
||||
#define ENQUEUE_REPLENISH 8
|
||||
#define ENQUEUE_REPLENISH 0x08
|
||||
#define ENQUEUE_RESTORE 0x10
|
||||
|
||||
#define DEQUEUE_SLEEP 1
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
|
|
Loading…
Reference in New Issue