mirror of https://gitee.com/openkylin/linux.git
psi: Optimize task switch inside shared cgroups
The commit 36b238d571
("psi: Optimize switching tasks inside shared
cgroups") only update cgroups whose state actually changes during a
task switch only in task preempt case, not in task sleep case.
We actually don't need to clear and set TSK_ONCPU state for common cgroups
of next and prev task in sleep case, that can save many psi_group_change
especially when most activity comes from one leaf cgroup.
sleep before:
psi_dequeue()
while ((group = iterate_groups(prev))) # all ancestors
psi_group_change(prev, .clear=TSK_RUNNING|TSK_ONCPU)
psi_task_switch()
while ((group = iterate_groups(next))) # all ancestors
psi_group_change(next, .set=TSK_ONCPU)
sleep after:
psi_dequeue()
nop
psi_task_switch()
while ((group = iterate_groups(next))) # until (prev & next)
psi_group_change(next, .set=TSK_ONCPU)
while ((group = iterate_groups(prev))) # all ancestors
psi_group_change(prev, .clear=common?TSK_RUNNING:TSK_RUNNING|TSK_ONCPU)
When a voluntary sleep switches to another task, we remove one call of
psi_group_change() for every common cgroup ancestor of the two tasks.
Co-developed-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210303034659.91735-5-zhouchengming@bytedance.com
This commit is contained in:
parent
fddc8bab53
commit
4117cebf1a
|
@ -840,20 +840,35 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If this is a voluntary sleep, dequeue will have taken care
|
|
||||||
* of the outgoing TSK_ONCPU alongside TSK_RUNNING already. We
|
|
||||||
* only need to deal with it during preemption.
|
|
||||||
*/
|
|
||||||
if (sleep)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (prev->pid) {
|
if (prev->pid) {
|
||||||
psi_flags_change(prev, TSK_ONCPU, 0);
|
int clear = TSK_ONCPU, set = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When we're going to sleep, psi_dequeue() lets us handle
|
||||||
|
* TSK_RUNNING and TSK_IOWAIT here, where we can combine it
|
||||||
|
* with TSK_ONCPU and save walking common ancestors twice.
|
||||||
|
*/
|
||||||
|
if (sleep) {
|
||||||
|
clear |= TSK_RUNNING;
|
||||||
|
if (prev->in_iowait)
|
||||||
|
set |= TSK_IOWAIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
psi_flags_change(prev, clear, set);
|
||||||
|
|
||||||
iter = NULL;
|
iter = NULL;
|
||||||
while ((group = iterate_groups(prev, &iter)) && group != common)
|
while ((group = iterate_groups(prev, &iter)) && group != common)
|
||||||
psi_group_change(group, cpu, TSK_ONCPU, 0, true);
|
psi_group_change(group, cpu, clear, set, true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TSK_ONCPU is handled up to the common ancestor. If we're tasked
|
||||||
|
* with dequeuing too, finish that for the rest of the hierarchy.
|
||||||
|
*/
|
||||||
|
if (sleep) {
|
||||||
|
clear &= ~TSK_ONCPU;
|
||||||
|
for (; group; group = iterate_groups(prev, &iter))
|
||||||
|
psi_group_change(group, cpu, clear, set, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -84,28 +84,24 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
||||||
|
|
||||||
static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
||||||
{
|
{
|
||||||
int clear = TSK_RUNNING, set = 0;
|
int clear = TSK_RUNNING;
|
||||||
|
|
||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!sleep) {
|
/*
|
||||||
if (p->in_memstall)
|
* A voluntary sleep is a dequeue followed by a task switch. To
|
||||||
clear |= TSK_MEMSTALL;
|
* avoid walking all ancestors twice, psi_task_switch() handles
|
||||||
} else {
|
* TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
|
||||||
/*
|
* Do nothing here.
|
||||||
* When a task sleeps, schedule() dequeues it before
|
*/
|
||||||
* switching to the next one. Merge the clearing of
|
if (sleep)
|
||||||
* TSK_RUNNING and TSK_ONCPU to save an unnecessary
|
return;
|
||||||
* psi_task_change() call in psi_sched_switch().
|
|
||||||
*/
|
|
||||||
clear |= TSK_ONCPU;
|
|
||||||
|
|
||||||
if (p->in_iowait)
|
if (p->in_memstall)
|
||||||
set |= TSK_IOWAIT;
|
clear |= TSK_MEMSTALL;
|
||||||
}
|
|
||||||
|
|
||||||
psi_task_change(p, clear, set);
|
psi_task_change(p, clear, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||||
|
|
Loading…
Reference in New Issue