rcu: Add fastpath bypassing funnel locking

In the common case, there will be only one expedited grace period in
the system at a given time, in which case it is not helpful to use
funnel locking.  This commit therefore adds a fastpath that bypasses
funnel locking when the root ->exp_funnel_mutex is not held.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney 2015-07-11 16:24:45 -07:00
parent 32bb1c7999
commit cdacbe1f91
4 changed files with 29 additions and 29 deletions

View File

@ -237,42 +237,26 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872
s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
These fields are as follows:
o "s" is the starting sequence number.
o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress.
o "d" is the ending sequence number. When the starting and ending
numbers differ, there is an expedited grace period in progress.
o "w" is the number of times that the sequence numbers have been
in danger of wrapping.
o "tf" is the number of times that contention has resulted in a
failure to begin an expedited grace period.
o "wd1" and "wd2" are the number of times that an attempt to
start an expedited grace period found that someone else had
completed an expedited grace period that satisfies the
o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
attempt to start an expedited grace period found that someone
else had completed an expedited grace period that satisfies the
attempted request. "Our work is done."
o "n" is number of times that contention was so great that
the request was demoted from an expedited grace period to
a normal grace period.
o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period.
o "enq" is the number of quiescent states still outstanding.
o "sc" is the number of times that the attempt to start a
new expedited grace period succeeded.
o "dt" is the number of times that we attempted to update
the "d" counter.
o "dl" is the number of times that we failed to update the "d"
counter.
o "dx" is the number of times that we succeeded in updating
the "d" counter.
The output of "cat rcu/rcu_preempt/rcugp" looks as follows:

View File

@ -3355,6 +3355,22 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
/*
* First try directly acquiring the root lock in order to reduce
* latency in the common case where expedited grace periods are
* rare. We check mutex_is_locked() to avoid pathological levels of
* memory contention on ->exp_funnel_mutex in the heavy-load case.
*/
rnp0 = rcu_get_root(rsp);
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
&rsp->expedited_workdone0, s))
return NULL;
return rnp0;
}
}
/*
* Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the

View File

@ -493,7 +493,7 @@ struct rcu_state {
/* End of fields guarded by barrier_mutex. */
unsigned long expedited_sequence; /* Take a ticket. */
atomic_long_t expedited_tryfail; /* # acquisition failures. */
atomic_long_t expedited_workdone0; /* # done by others #0. */
atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_workdone3; /* # done by others #3. */

View File

@ -185,9 +185,9 @@ static int show_rcuexp(struct seq_file *m, void *v)
{
struct rcu_state *rsp = (struct rcu_state *)m->private;
seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
rsp->expedited_sequence,
atomic_long_read(&rsp->expedited_tryfail),
atomic_long_read(&rsp->expedited_workdone0),
atomic_long_read(&rsp->expedited_workdone1),
atomic_long_read(&rsp->expedited_workdone2),
atomic_long_read(&rsp->expedited_workdone3),