Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Misc fixes: a cgroup fix, a fair-scheduler migration accounting fix, a
  cputime fix and two cpuacct cleanups"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/cpuacct: Simplify the cpuacct code
  sched/cpuacct: Rename parameter in cpuusage_write() for readability
  sched/fair: Add comments to explain select_idle_sibling()
  sched/fair: Fix fairness issue on migration
  sched/cgroup: Fix/cleanup cgroup teardown/init
  sched/cputime: Fix steal time accounting vs. CPU hotplug
This commit is contained in:
Linus Torvalds 2016-03-24 09:42:50 -07:00
commit be53f58fa0
5 changed files with 74 additions and 57 deletions

View File

@ -5371,6 +5371,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_UP_PREPARE:
rq->calc_load_update = calc_load_update;
account_reset_rq(rq);
break;
case CPU_ONLINE:
@ -7537,7 +7538,7 @@ void set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);
static void free_sched_group(struct task_group *tg)
static void sched_free_group(struct task_group *tg)
{
free_fair_sched_group(tg);
free_rt_sched_group(tg);
@ -7563,7 +7564,7 @@ struct task_group *sched_create_group(struct task_group *parent)
return tg;
err:
free_sched_group(tg);
sched_free_group(tg);
return ERR_PTR(-ENOMEM);
}
@ -7583,17 +7584,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
}
/* rcu callback to free various structures associated with a task group */
static void free_sched_group_rcu(struct rcu_head *rhp)
static void sched_free_group_rcu(struct rcu_head *rhp)
{
/* now it should be safe to free those cfs_rqs */
free_sched_group(container_of(rhp, struct task_group, rcu));
sched_free_group(container_of(rhp, struct task_group, rcu));
}
/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
/* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&tg->rcu, free_sched_group_rcu);
call_rcu(&tg->rcu, sched_free_group_rcu);
}
void sched_offline_group(struct task_group *tg)
@ -8052,31 +8052,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (IS_ERR(tg))
return ERR_PTR(-ENOMEM);
sched_online_group(tg, parent);
return &tg->css;
}
static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
struct task_group *parent = css_tg(css->parent);
if (parent)
sched_online_group(tg, parent);
return 0;
sched_offline_group(tg);
}
static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
sched_destroy_group(tg);
}
static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
{
struct task_group *tg = css_tg(css);
sched_offline_group(tg);
/*
* Relies on the RCU grace period between css_released() and this.
*/
sched_free_group(tg);
}
static void cpu_cgroup_fork(struct task_struct *task)
@ -8436,9 +8431,8 @@ static struct cftype cpu_files[] = {
struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc,
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
.css_online = cpu_cgroup_css_online,
.css_offline = cpu_cgroup_css_offline,
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,

View File

@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
}
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 reset)
u64 val)
{
struct cpuacct *ca = css_ca(css);
int err = 0;
int i;
if (reset) {
/*
* Only allow '0' here to do a reset.
*/
if (val) {
err = -EINVAL;
goto out;
}
@ -235,23 +238,10 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
int cpu;
cpu = task_cpu(tsk);
rcu_read_lock();
ca = task_ca(tsk);
while (true) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
ca = parent_ca(ca);
if (!ca)
break;
}
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
*this_cpu_ptr(ca->cpuusage) += cputime;
rcu_read_unlock();
}
@ -260,18 +250,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
*
* Note: it's the caller that updates the account of the root cgroup.
*/
void cpuacct_account_field(struct task_struct *p, int index, u64 val)
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
struct kernel_cpustat *kcpustat;
struct cpuacct *ca;
rcu_read_lock();
ca = task_ca(p);
while (ca != &root_cpuacct) {
kcpustat = this_cpu_ptr(ca->cpustat);
kcpustat->cpustat[index] += val;
ca = parent_ca(ca);
}
for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
rcu_read_unlock();
}

View File

@ -1,7 +1,7 @@
#ifdef CONFIG_CGROUP_CPUACCT
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
#else
@ -10,7 +10,7 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
}
static inline void
cpuacct_account_field(struct task_struct *p, int index, u64 val)
cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
}

View File

@ -3181,17 +3181,25 @@ static inline void check_schedstat_required(void)
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
* Update the normalized vruntime before updating min_vruntime
* through calling update_curr().
*/
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
se->vruntime += cfs_rq->min_vruntime;
bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
bool curr = cfs_rq->curr == se;
/*
* Update run-time statistics of the 'current'.
* If we're the current task, we must renormalise before calling
* update_curr().
*/
if (renorm && curr)
se->vruntime += cfs_rq->min_vruntime;
update_curr(cfs_rq);
/*
* Otherwise, renormalise after, such that we're placed at the current
* moment in time, instead of some random moment in the past.
*/
if (renorm && !curr)
se->vruntime += cfs_rq->min_vruntime;
enqueue_entity_load_avg(cfs_rq, se);
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
@ -3207,7 +3215,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_stats_enqueue(cfs_rq, se);
check_spread(cfs_rq, se);
}
if (se != cfs_rq->curr)
if (!curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
@ -5071,7 +5079,19 @@ static int select_idle_sibling(struct task_struct *p, int target)
return i;
/*
* Otherwise, iterate the domains and find an elegible idle cpu.
* Otherwise, iterate the domains and find an eligible idle cpu.
*
* A completely idle sched group at higher domains is more
* desirable than an idle group at a lower level, because lower
* domains have smaller groups and usually share hardware
* resources which causes tasks to contend on them, e.g. x86
* hyperthread siblings in the lowest domain (SMT) can contend
* on the shared cpu pipeline.
*
* However, while we prefer idle groups at higher domains
* finding an idle cpu at the lowest domain is still better than
* returning 'target', which we've already established, isn't
* idle.
*/
sd = rcu_dereference(per_cpu(sd_llc, target));
for_each_lower_domain(sd) {
@ -5081,11 +5101,16 @@ static int select_idle_sibling(struct task_struct *p, int target)
tsk_cpus_allowed(p)))
goto next;
/* Ensure the entire group is idle */
for_each_cpu(i, sched_group_cpus(sg)) {
if (i == target || !idle_cpu(i))
goto next;
}
/*
* It doesn't matter which cpu we pick, the
* whole group is idle.
*/
target = cpumask_first_and(sched_group_cpus(sg),
tsk_cpus_allowed(p));
goto done;

View File

@ -1841,3 +1841,16 @@ static inline void cpufreq_trigger_update(u64 time)
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */
static inline void account_reset_rq(struct rq *rq)
{
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
rq->prev_irq_time = 0;
#endif
#ifdef CONFIG_PARAVIRT
rq->prev_steal_time = 0;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
rq->prev_steal_time_rq = 0;
#endif
}