rcu: Add tracing for rcuo no-CBs CPU wakeup handshake
Lost wakeups from call_rcu() to the rcuo kthreads can result in hangs that are difficult to diagnose. This commit therefore adds tracing to help pin down the cause of these hangs. Reported-by: Clark Williams <williams@redhat.com> Reported-by: Carsten Emde <C.Emde@osadl.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> [ paulmck: Add const per kbuild test robot's advice. ]
This commit is contained in:
parent
bb311eccbd
commit
9261dd0da6
|
@ -171,6 +171,42 @@ TRACE_EVENT(rcu_grace_period_init,
|
|||
__entry->grplo, __entry->grphi, __entry->qsmask)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended
|
||||
* to assist debugging of these handoffs.
|
||||
*
|
||||
* The first argument is the name of the RCU flavor, and the second is
|
||||
* the number of the offloaded CPU are extracted. The third and final
|
||||
* argument is a string as follows:
|
||||
*
|
||||
* "WakeEmpty": Wake rcuo kthread, first CB to empty list.
|
||||
* "WakeOvf": Wake rcuo kthread, CB list is huge.
|
||||
* "WakeNot": Don't wake rcuo kthread.
|
||||
* "WakeNotPoll": Don't wake rcuo kthread because it is polling.
|
||||
* "WokeEmpty": rcuo kthread woke to find empty list.
|
||||
* "WokeNonEmpty": rcuo kthread woke to find non-empty list.
|
||||
*/
|
||||
TRACE_EVENT(rcu_nocb_wake,
|
||||
|
||||
TP_PROTO(const char *rcuname, int cpu, const char *reason),
|
||||
|
||||
TP_ARGS(rcuname, cpu, reason),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(const char *, rcuname)
|
||||
__field(int, cpu)
|
||||
__field(const char *, reason)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rcuname = rcuname;
|
||||
__entry->cpu = cpu;
|
||||
__entry->reason = reason;
|
||||
),
|
||||
|
||||
TP_printk("%s %d %s", __entry->rcuname, __entry->cpu, __entry->reason)
|
||||
);
|
||||
|
||||
/*
|
||||
* Tracepoint for tasks blocking within preemptible-RCU read-side
|
||||
* critical sections. Track the type of RCU (which one day might
|
||||
|
@ -667,6 +703,7 @@ TRACE_EVENT(rcu_barrier,
|
|||
#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
|
||||
level, grplo, grphi, event) \
|
||||
do { } while (0)
|
||||
#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
|
||||
#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
|
||||
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
|
||||
#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
|
||||
|
|
|
@ -2108,15 +2108,22 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
|
|||
|
||||
/* If we are not being polled and there is a kthread, awaken it ... */
|
||||
t = ACCESS_ONCE(rdp->nocb_kthread);
|
||||
if (rcu_nocb_poll | !t)
|
||||
if (rcu_nocb_poll | !t) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WakeNotPoll"));
|
||||
return;
|
||||
}
|
||||
len = atomic_long_read(&rdp->nocb_q_count);
|
||||
if (old_rhpp == &rdp->nocb_head) {
|
||||
wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
|
||||
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
|
||||
wake_up_process(t); /* ... or if many callbacks queued. */
|
||||
rdp->qlen_last_fqs_check = LONG_MAX / 2;
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
|
||||
} else {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2233,10 +2240,15 @@ static int rcu_nocb_kthread(void *arg)
|
|||
wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
|
||||
list = ACCESS_ONCE(rdp->nocb_head);
|
||||
if (!list) {
|
||||
if (!rcu_nocb_poll)
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WokeEmpty"));
|
||||
schedule_timeout_interruptible(1);
|
||||
flush_signals(current);
|
||||
continue;
|
||||
}
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
TPS("WokeNonEmpty"));
|
||||
|
||||
/*
|
||||
* Extract queued callbacks, update counts, and wait
|
||||
|
|
Loading…
Reference in New Issue