mirror of https://gitee.com/openkylin/linux.git
softlockup: make detector be aware of task switch of processes hogging cpu
For now, soft lockup detector warns once for each case of process softlockup. But the thread 'watchdog/n' may not always get the cpu at the time slot between the task switch of two processes hogging that cpu to reset soft_watchdog_warn. An example would be two processes hogging the cpu. Process A causes the softlockup warning and is killed manually by a user. Process B immediately becomes the new process hogging the cpu preventing the softlockup code from resetting the soft_watchdog_warn variable. This case is a false negative of "warn only once for a process", as there may be a different process that is going to hog the cpu. Resolve this by saving/checking the task pointer of the hogging process and use that to reset soft_watchdog_warn too. [dzickus@redhat.com: update comment] Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com> Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f775da2fc2
commit
b1a8de1f53
|
@ -47,6 +47,7 @@ static DEFINE_PER_CPU(bool, softlockup_touch_sync);
|
|||
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
|
||||
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
|
||||
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
|
||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
||||
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
|
||||
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
|
@ -333,8 +334,22 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
return HRTIMER_RESTART;
|
||||
|
||||
/* only warn once */
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true) {
|
||||
/*
|
||||
* When multiple processes are causing softlockups the
|
||||
* softlockup detector only warns on the first one
|
||||
* because the code relies on a full quiet cycle to
|
||||
* re-arm. The second process prevents the quiet cycle
|
||||
* and never gets reported. Use task pointers to detect
|
||||
* this.
|
||||
*/
|
||||
if (__this_cpu_read(softlockup_task_ptr_saved) !=
|
||||
current) {
|
||||
__this_cpu_write(soft_watchdog_warn, false);
|
||||
__touch_watchdog();
|
||||
}
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||
|
@ -350,6 +365,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
|||
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||
smp_processor_id(), duration,
|
||||
current->comm, task_pid_nr(current));
|
||||
__this_cpu_write(softlockup_task_ptr_saved, current);
|
||||
print_modules();
|
||||
print_irqtrace_events(current);
|
||||
if (regs)
|
||||
|
|
Loading…
Reference in New Issue