locking/rwsem: Scan the wait_list for readers only once

When wanting to wakeup readers, __rwsem_mark_wakeup() currently
iterates the wait_list twice while looking to wakeup the first N
queued reader-tasks. While this can be quite inefficient, it was
there such that a awoken reader would be first and foremost
acknowledged by the lock counter.

Keeping the same logic, we can further benefit from the use of
wake_qs and avoid entirely the first wait_list iteration that sets
the counter as wake_up_process() isn't going to occur right away,
and therefore we maintain the counter->list order of going about
things.

Other than saving cycles with O(n) "scanning", this change also
nicely cleans up a good chunk of __rwsem_mark_wakeup(); both
visually and less tedious to read.

For example, the following improvements where seen on some will
it scale microbenchmarks, on a 48-core Haswell:

                                       v4.7              v4.7-rwsem-v1
  Hmean    signal1-processes-8    5792691.42 (  0.00%)  5771971.04 ( -0.36%)
  Hmean    signal1-processes-12   6081199.96 (  0.00%)  6072174.38 ( -0.15%)
  Hmean    signal1-processes-21   3071137.71 (  0.00%)  3041336.72 ( -0.97%)
  Hmean    signal1-processes-48   3712039.98 (  0.00%)  3708113.59 ( -0.11%)
  Hmean    signal1-processes-79   4464573.45 (  0.00%)  4682798.66 (  4.89%)
  Hmean    signal1-processes-110  4486842.01 (  0.00%)  4633781.71 (  3.27%)
  Hmean    signal1-processes-141  4611816.83 (  0.00%)  4692725.38 (  1.75%)
  Hmean    signal1-processes-172  4638157.05 (  0.00%)  4714387.86 (  1.64%)
  Hmean    signal1-processes-203  4465077.80 (  0.00%)  4690348.07 (  5.05%)
  Hmean    signal1-processes-224  4410433.74 (  0.00%)  4687534.43 (  6.28%)

  Stddev   signal1-processes-8       6360.47 (  0.00%)     8455.31 ( 32.94%)
  Stddev   signal1-processes-12      4004.98 (  0.00%)     9156.13 (128.62%)
  Stddev   signal1-processes-21      3273.14 (  0.00%)     5016.80 ( 53.27%)
  Stddev   signal1-processes-48     28420.25 (  0.00%)    26576.22 ( -6.49%)
  Stddev   signal1-processes-79     22038.34 (  0.00%)    18992.70 (-13.82%)
  Stddev   signal1-processes-110    23226.93 (  0.00%)    17245.79 (-25.75%)
  Stddev   signal1-processes-141     6358.98 (  0.00%)     7636.14 ( 20.08%)
  Stddev   signal1-processes-172     9523.70 (  0.00%)     4824.75 (-49.34%)
  Stddev   signal1-processes-203    13915.33 (  0.00%)     9326.33 (-32.98%)
  Stddev   signal1-processes-224    15573.94 (  0.00%)    10613.82 (-31.85%)

Other runs that saw improvements include context_switch and pipe; and
as expected, this is particularly highlighted on larger thread counts
as it becomes more expensive to walk the list twice.

No change in wakeup ordering or semantics.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman.Long@hp.com
Cc: dave@stgolabs.net
Cc: jason.low2@hpe.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Davidlohr Bueso 2016-08-05 01:04:45 -07:00 committed by Ingo Molnar
parent c2867bbaf5
commit 70800c3c0c
1 changed files with 26 additions and 32 deletions

View File

@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
enum rwsem_wake_type wake_type, enum rwsem_wake_type wake_type,
struct wake_q_head *wake_q) struct wake_q_head *wake_q)
{ {
struct rwsem_waiter *waiter; struct rwsem_waiter *waiter, *tmp;
struct task_struct *tsk; long oldcount, woken = 0, adjustment = 0;
struct list_head *next;
long loop, oldcount, woken = 0, adjustment = 0;
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); /*
* Take a peek at the queue head waiter such that we can determine
* the wakeup(s) to perform.
*/
waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) { if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY) { if (wake_type == RWSEM_WAKE_ANY) {
@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
/* /*
* Grant an infinite number of read locks to the readers at the front * Grant an infinite number of read locks to the readers at the front
* of the queue. Note we increment the 'active part' of the count by * of the queue. We know that woken will be at least 1 as we accounted
* the number of readers before waking any processes up. * for above. Note we increment the 'active part' of the count by the
* number of readers before waking any processes up.
*/ */
do { list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
woken++; struct task_struct *tsk;
if (waiter->list.next == &sem->wait_list) if (waiter->type == RWSEM_WAITING_FOR_WRITE)
break; break;
waiter = list_entry(waiter->list.next, woken++;
struct rwsem_waiter, list);
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
if (waiter->type != RWSEM_WAITING_FOR_WRITE)
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
if (adjustment)
atomic_long_add(adjustment, &sem->count);
next = sem->wait_list.next;
loop = woken;
do {
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task; tsk = waiter->task;
wake_q_add(wake_q, tsk); wake_q_add(wake_q, tsk);
list_del(&waiter->list);
/* /*
* Ensure that the last operation is setting the reader * Ensure that the last operation is setting the reader
* waiter to nil such that rwsem_down_read_failed() cannot * waiter to nil such that rwsem_down_read_failed() cannot
@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* to the task to wakeup. * to the task to wakeup.
*/ */
smp_store_release(&waiter->task, NULL); smp_store_release(&waiter->task, NULL);
} while (--loop); }
sem->wait_list.next = next; adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
next->prev = &sem->wait_list; if (list_empty(&sem->wait_list)) {
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
}
if (adjustment)
atomic_long_add(adjustment, &sem->count);
} }
/* /*
@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
/* we're now waiting on the lock, but no longer actively locking */ /* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count); count = atomic_long_add_return(adjustment, &sem->count);
/* If there are no active locks, wake the front queued process(es). /*
* If there are no active locks, wake the front queued process(es).
* *
* If there are no writers and we are first in the queue, * If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers ! * wake our own waiter to join the existing active readers !