mirror of https://gitee.com/openkylin/linux.git
sched/deadline: Fix sched_dl_global_validate()
When change sched_rt_{runtime, period}_us, we validate that the new
settings should at least accommodate the currently allocated -dl
bandwidth:
sched_rt_handler()
--> sched_dl_bandwidth_validate()
{
new_bw = global_rt_runtime()/global_rt_period();
for_each_possible_cpu(cpu) {
dl_b = dl_bw_of(cpu);
if (new_bw < dl_b->total_bw) <-------
ret = -EBUSY;
}
}
But under CONFIG_SMP, dl_bw is per root domain , but not per CPU,
dl_b->total_bw is the allocated bandwidth of the whole root domain.
Instead, we should compare dl_b->total_bw against "cpus*new_bw",
where 'cpus' is the number of CPUs of the root domain.
Also, below annotation(in kernel/sched/sched.h) implied implementation
only appeared in SCHED_DEADLINE v2[1], then deadline scheduler kept
evolving till got merged(v9), but the annotation remains unchanged,
meaningless and misleading, update it.
* With respect to SMP, the bandwidth is given on a per-CPU basis,
* meaning that:
* - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
* - dl_total_bw array contains, in the i-eth element, the currently
* allocated bandwidth on the i-eth CPU.
[1]: https://lore.kernel.org/lkml/1267385230.13676.101.camel@Palantir/
Fixes: 332ac17ef5
("sched/deadline: Add bandwidth management for SCHED_DEADLINE tasks")
Signed-off-by: Peng Liu <iwtbavbm@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lkml.kernel.org/r/db6bbda316048cda7a1bbc9571defde193a8d67e.1602171061.git.iwtbavbm@gmail.com
This commit is contained in:
parent
26762423a2
commit
a57415f5d1
|
@ -2561,7 +2561,7 @@ int sched_dl_global_validate(void)
|
|||
u64 new_bw = to_ratio(period, runtime);
|
||||
u64 gen = ++dl_generation;
|
||||
struct dl_bw *dl_b;
|
||||
int cpu, ret = 0;
|
||||
int cpu, cpus, ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
|
@ -2576,9 +2576,10 @@ int sched_dl_global_validate(void)
|
|||
goto next;
|
||||
|
||||
dl_b = dl_bw_of(cpu);
|
||||
cpus = dl_bw_cpus(cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&dl_b->lock, flags);
|
||||
if (new_bw < dl_b->total_bw)
|
||||
if (new_bw * cpus < dl_b->total_bw)
|
||||
ret = -EBUSY;
|
||||
raw_spin_unlock_irqrestore(&dl_b->lock, flags);
|
||||
|
||||
|
|
|
@ -257,30 +257,6 @@ struct rt_bandwidth {
|
|||
|
||||
void __dl_clear_params(struct task_struct *p);
|
||||
|
||||
/*
|
||||
* To keep the bandwidth of -deadline tasks and groups under control
|
||||
* we need some place where:
|
||||
* - store the maximum -deadline bandwidth of the system (the group);
|
||||
* - cache the fraction of that bandwidth that is currently allocated.
|
||||
*
|
||||
* This is all done in the data structure below. It is similar to the
|
||||
* one used for RT-throttling (rt_bandwidth), with the main difference
|
||||
* that, since here we are only interested in admission control, we
|
||||
* do not decrease any runtime while the group "executes", neither we
|
||||
* need a timer to replenish it.
|
||||
*
|
||||
* With respect to SMP, the bandwidth is given on a per-CPU basis,
|
||||
* meaning that:
|
||||
* - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
|
||||
* - dl_total_bw array contains, in the i-eth element, the currently
|
||||
* allocated bandwidth on the i-eth CPU.
|
||||
* Moreover, groups consume bandwidth on each CPU, while tasks only
|
||||
* consume bandwidth on the CPU they're running on.
|
||||
* Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
|
||||
* that will be shown the next time the proc or cgroup controls will
|
||||
* be red. It on its turn can be changed by writing on its own
|
||||
* control.
|
||||
*/
|
||||
struct dl_bandwidth {
|
||||
raw_spinlock_t dl_runtime_lock;
|
||||
u64 dl_runtime;
|
||||
|
@ -292,6 +268,24 @@ static inline int dl_bandwidth_enabled(void)
|
|||
return sysctl_sched_rt_runtime >= 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* To keep the bandwidth of -deadline tasks under control
|
||||
* we need some place where:
|
||||
* - store the maximum -deadline bandwidth of each cpu;
|
||||
* - cache the fraction of bandwidth that is currently allocated in
|
||||
* each root domain;
|
||||
*
|
||||
* This is all done in the data structure below. It is similar to the
|
||||
* one used for RT-throttling (rt_bandwidth), with the main difference
|
||||
* that, since here we are only interested in admission control, we
|
||||
* do not decrease any runtime while the group "executes", neither we
|
||||
* need a timer to replenish it.
|
||||
*
|
||||
* With respect to SMP, bandwidth is given on a per root domain basis,
|
||||
* meaning that:
|
||||
* - bw (< 100%) is the deadline bandwidth of each CPU;
|
||||
* - total_bw is the currently allocated bandwidth in each root domain;
|
||||
*/
|
||||
struct dl_bw {
|
||||
raw_spinlock_t lock;
|
||||
u64 bw;
|
||||
|
|
Loading…
Reference in New Issue