locking/rwsem: Scan the wait_list for readers only once
When wanting to wakeup readers, __rwsem_mark_wakeup() currently iterates the wait_list twice while looking to wakeup the first N queued reader-tasks. While this can be quite inefficient, it was there such that a awoken reader would be first and foremost acknowledged by the lock counter. Keeping the same logic, we can further benefit from the use of wake_qs and avoid entirely the first wait_list iteration that sets the counter as wake_up_process() isn't going to occur right away, and therefore we maintain the counter->list order of going about things. Other than saving cycles with O(n) "scanning", this change also nicely cleans up a good chunk of __rwsem_mark_wakeup(); both visually and less tedious to read. For example, the following improvements where seen on some will it scale microbenchmarks, on a 48-core Haswell: v4.7 v4.7-rwsem-v1 Hmean signal1-processes-8 5792691.42 ( 0.00%) 5771971.04 ( -0.36%) Hmean signal1-processes-12 6081199.96 ( 0.00%) 6072174.38 ( -0.15%) Hmean signal1-processes-21 3071137.71 ( 0.00%) 3041336.72 ( -0.97%) Hmean signal1-processes-48 3712039.98 ( 0.00%) 3708113.59 ( -0.11%) Hmean signal1-processes-79 4464573.45 ( 0.00%) 4682798.66 ( 4.89%) Hmean signal1-processes-110 4486842.01 ( 0.00%) 4633781.71 ( 3.27%) Hmean signal1-processes-141 4611816.83 ( 0.00%) 4692725.38 ( 1.75%) Hmean signal1-processes-172 4638157.05 ( 0.00%) 4714387.86 ( 1.64%) Hmean signal1-processes-203 4465077.80 ( 0.00%) 4690348.07 ( 5.05%) Hmean signal1-processes-224 4410433.74 ( 0.00%) 4687534.43 ( 6.28%) Stddev signal1-processes-8 6360.47 ( 0.00%) 8455.31 ( 32.94%) Stddev signal1-processes-12 4004.98 ( 0.00%) 9156.13 (128.62%) Stddev signal1-processes-21 3273.14 ( 0.00%) 5016.80 ( 53.27%) Stddev signal1-processes-48 28420.25 ( 0.00%) 26576.22 ( -6.49%) Stddev signal1-processes-79 22038.34 ( 0.00%) 18992.70 (-13.82%) Stddev signal1-processes-110 23226.93 ( 0.00%) 17245.79 (-25.75%) Stddev signal1-processes-141 6358.98 ( 0.00%) 7636.14 ( 20.08%) Stddev signal1-processes-172 9523.70 ( 0.00%) 4824.75 (-49.34%) Stddev signal1-processes-203 13915.33 ( 0.00%) 9326.33 (-32.98%) Stddev signal1-processes-224 15573.94 ( 0.00%) 10613.82 (-31.85%) Other runs that saw improvements include context_switch and pipe; and as expected, this is particularly highlighted on larger thread counts as it becomes more expensive to walk the list twice. No change in wakeup ordering or semantics. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Waiman.Long@hp.com Cc: dave@stgolabs.net Cc: jason.low2@hpe.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1470384285-32163-4-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
c2867bbaf5
commit
70800c3c0c
|
@ -125,12 +125,14 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||||
enum rwsem_wake_type wake_type,
|
enum rwsem_wake_type wake_type,
|
||||||
struct wake_q_head *wake_q)
|
struct wake_q_head *wake_q)
|
||||||
{
|
{
|
||||||
struct rwsem_waiter *waiter;
|
struct rwsem_waiter *waiter, *tmp;
|
||||||
struct task_struct *tsk;
|
long oldcount, woken = 0, adjustment = 0;
|
||||||
struct list_head *next;
|
|
||||||
long loop, oldcount, woken = 0, adjustment = 0;
|
|
||||||
|
|
||||||
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
|
/*
|
||||||
|
* Take a peek at the queue head waiter such that we can determine
|
||||||
|
* the wakeup(s) to perform.
|
||||||
|
*/
|
||||||
|
waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
|
||||||
|
|
||||||
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
||||||
if (wake_type == RWSEM_WAKE_ANY) {
|
if (wake_type == RWSEM_WAKE_ANY) {
|
||||||
|
@ -180,36 +182,21 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Grant an infinite number of read locks to the readers at the front
|
* Grant an infinite number of read locks to the readers at the front
|
||||||
* of the queue. Note we increment the 'active part' of the count by
|
* of the queue. We know that woken will be at least 1 as we accounted
|
||||||
* the number of readers before waking any processes up.
|
* for above. Note we increment the 'active part' of the count by the
|
||||||
|
* number of readers before waking any processes up.
|
||||||
*/
|
*/
|
||||||
do {
|
list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
|
||||||
woken++;
|
struct task_struct *tsk;
|
||||||
|
|
||||||
if (waiter->list.next == &sem->wait_list)
|
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
waiter = list_entry(waiter->list.next,
|
woken++;
|
||||||
struct rwsem_waiter, list);
|
|
||||||
|
|
||||||
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
|
|
||||||
|
|
||||||
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
|
||||||
if (waiter->type != RWSEM_WAITING_FOR_WRITE)
|
|
||||||
/* hit end of list above */
|
|
||||||
adjustment -= RWSEM_WAITING_BIAS;
|
|
||||||
|
|
||||||
if (adjustment)
|
|
||||||
atomic_long_add(adjustment, &sem->count);
|
|
||||||
|
|
||||||
next = sem->wait_list.next;
|
|
||||||
loop = woken;
|
|
||||||
do {
|
|
||||||
waiter = list_entry(next, struct rwsem_waiter, list);
|
|
||||||
next = waiter->list.next;
|
|
||||||
tsk = waiter->task;
|
tsk = waiter->task;
|
||||||
|
|
||||||
wake_q_add(wake_q, tsk);
|
wake_q_add(wake_q, tsk);
|
||||||
|
list_del(&waiter->list);
|
||||||
/*
|
/*
|
||||||
* Ensure that the last operation is setting the reader
|
* Ensure that the last operation is setting the reader
|
||||||
* waiter to nil such that rwsem_down_read_failed() cannot
|
* waiter to nil such that rwsem_down_read_failed() cannot
|
||||||
|
@ -217,10 +204,16 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||||
* to the task to wakeup.
|
* to the task to wakeup.
|
||||||
*/
|
*/
|
||||||
smp_store_release(&waiter->task, NULL);
|
smp_store_release(&waiter->task, NULL);
|
||||||
} while (--loop);
|
}
|
||||||
|
|
||||||
sem->wait_list.next = next;
|
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
||||||
next->prev = &sem->wait_list;
|
if (list_empty(&sem->wait_list)) {
|
||||||
|
/* hit end of list above */
|
||||||
|
adjustment -= RWSEM_WAITING_BIAS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (adjustment)
|
||||||
|
atomic_long_add(adjustment, &sem->count);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -245,7 +238,8 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||||
/* we're now waiting on the lock, but no longer actively locking */
|
/* we're now waiting on the lock, but no longer actively locking */
|
||||||
count = atomic_long_add_return(adjustment, &sem->count);
|
count = atomic_long_add_return(adjustment, &sem->count);
|
||||||
|
|
||||||
/* If there are no active locks, wake the front queued process(es).
|
/*
|
||||||
|
* If there are no active locks, wake the front queued process(es).
|
||||||
*
|
*
|
||||||
* If there are no writers and we are first in the queue,
|
* If there are no writers and we are first in the queue,
|
||||||
* wake our own waiter to join the existing active readers !
|
* wake our own waiter to join the existing active readers !
|
||||||
|
|
Loading…
Reference in New Issue