mirror of https://gitee.com/openkylin/linux.git
Merge branch 'for-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue fixes from Tejun Heo: "It's later than I'd like but well the timing just didn't work out this time. There are three bug fixes. One from before 3.6-rc1 and two from the new CPU hotplug code. Kudos to Lai for discovering all of them and providing fixes. * Atomicity bug when clearing a flag and setting another. The two operation should have been atomic but wasn't. This bug has existed for a long time but is unlikely to have actually happened. Fix is safe. Marked for -stable. * If CPU hotplug cycles happen back-to-back before workers finish the previous cycle, the states could get out of sync and it could get stuck. Fixed by waiting for workers to complete before finishing hotplug cycle. * While CPU hotplug is in progress, idle workers could be depleted which can then lead to deadlock. I think both happening together is highly unlikely but still better to fix it and the fix isn't too scary. There's another workqueue related regression which reported a few days ago: https://bugzilla.kernel.org/show_bug.cgi?id=47301 It's a bit of head scratcher but there is a semi-reliable reproduce case, so I'm hoping to resolve it soonish." * 'for-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: fix possible idle worker depletion across CPU hotplug workqueue: restore POOL_MANAGING_WORKERS workqueue: fix possible deadlock in idle worker rebinding workqueue: move WORKER_REBIND clearing in rebind_workers() to the end of the function workqueue: UNBOUND -> REBIND morphing in rebind_workers() should be atomic
This commit is contained in:
commit
0bd1189e23
|
@ -66,6 +66,7 @@ enum {
|
|||
|
||||
/* pool flags */
|
||||
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
|
||||
POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
|
||||
|
||||
/* worker flags */
|
||||
WORKER_STARTED = 1 << 0, /* started */
|
||||
|
@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
|
|||
/* Do we have too many workers and should some go away? */
|
||||
static bool too_many_workers(struct worker_pool *pool)
|
||||
{
|
||||
bool managing = mutex_is_locked(&pool->manager_mutex);
|
||||
bool managing = pool->flags & POOL_MANAGING_WORKERS;
|
||||
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
|
||||
int nr_busy = pool->nr_workers - nr_idle;
|
||||
|
||||
|
@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
|
|||
|
||||
/* we did our part, wait for rebind_workers() to finish up */
|
||||
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
|
||||
|
||||
/*
|
||||
* rebind_workers() shouldn't finish until all workers passed the
|
||||
* above WORKER_REBIND wait. Tell it when done.
|
||||
*/
|
||||
spin_lock_irq(&worker->pool->gcwq->lock);
|
||||
if (!--worker->idle_rebind->cnt)
|
||||
complete(&worker->idle_rebind->done);
|
||||
spin_unlock_irq(&worker->pool->gcwq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1396,12 +1406,15 @@ static void rebind_workers(struct global_cwq *gcwq)
|
|||
/* set REBIND and kick idle ones, we'll wait for these later */
|
||||
for_each_worker_pool(pool, gcwq) {
|
||||
list_for_each_entry(worker, &pool->idle_list, entry) {
|
||||
unsigned long worker_flags = worker->flags;
|
||||
|
||||
if (worker->flags & WORKER_REBIND)
|
||||
continue;
|
||||
|
||||
/* morph UNBOUND to REBIND */
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
worker->flags |= WORKER_REBIND;
|
||||
/* morph UNBOUND to REBIND atomically */
|
||||
worker_flags &= ~WORKER_UNBOUND;
|
||||
worker_flags |= WORKER_REBIND;
|
||||
ACCESS_ONCE(worker->flags) = worker_flags;
|
||||
|
||||
idle_rebind.cnt++;
|
||||
worker->idle_rebind = &idle_rebind;
|
||||
|
@ -1419,25 +1432,15 @@ static void rebind_workers(struct global_cwq *gcwq)
|
|||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* All idle workers are rebound and waiting for %WORKER_REBIND to
|
||||
* be cleared inside idle_worker_rebind(). Clear and release.
|
||||
* Clearing %WORKER_REBIND from this foreign context is safe
|
||||
* because these workers are still guaranteed to be idle.
|
||||
*/
|
||||
for_each_worker_pool(pool, gcwq)
|
||||
list_for_each_entry(worker, &pool->idle_list, entry)
|
||||
worker->flags &= ~WORKER_REBIND;
|
||||
|
||||
wake_up_all(&gcwq->rebind_hold);
|
||||
|
||||
/* rebind busy workers */
|
||||
/* all idle workers are rebound, rebind busy workers */
|
||||
for_each_busy_worker(worker, i, pos, gcwq) {
|
||||
struct work_struct *rebind_work = &worker->rebind_work;
|
||||
unsigned long worker_flags = worker->flags;
|
||||
|
||||
/* morph UNBOUND to REBIND */
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
worker->flags |= WORKER_REBIND;
|
||||
/* morph UNBOUND to REBIND atomically */
|
||||
worker_flags &= ~WORKER_UNBOUND;
|
||||
worker_flags |= WORKER_REBIND;
|
||||
ACCESS_ONCE(worker->flags) = worker_flags;
|
||||
|
||||
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
|
||||
work_data_bits(rebind_work)))
|
||||
|
@ -1449,6 +1452,34 @@ static void rebind_workers(struct global_cwq *gcwq)
|
|||
worker->scheduled.next,
|
||||
work_color_to_flags(WORK_NO_COLOR));
|
||||
}
|
||||
|
||||
/*
|
||||
* All idle workers are rebound and waiting for %WORKER_REBIND to
|
||||
* be cleared inside idle_worker_rebind(). Clear and release.
|
||||
* Clearing %WORKER_REBIND from this foreign context is safe
|
||||
* because these workers are still guaranteed to be idle.
|
||||
*
|
||||
* We need to make sure all idle workers passed WORKER_REBIND wait
|
||||
* in idle_worker_rebind() before returning; otherwise, workers can
|
||||
* get stuck at the wait if hotplug cycle repeats.
|
||||
*/
|
||||
idle_rebind.cnt = 1;
|
||||
INIT_COMPLETION(idle_rebind.done);
|
||||
|
||||
for_each_worker_pool(pool, gcwq) {
|
||||
list_for_each_entry(worker, &pool->idle_list, entry) {
|
||||
worker->flags &= ~WORKER_REBIND;
|
||||
idle_rebind.cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
wake_up_all(&gcwq->rebind_hold);
|
||||
|
||||
if (--idle_rebind.cnt) {
|
||||
spin_unlock_irq(&gcwq->lock);
|
||||
wait_for_completion(&idle_rebind.done);
|
||||
spin_lock_irq(&gcwq->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static struct worker *alloc_worker(void)
|
||||
|
@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker)
|
|||
struct worker_pool *pool = worker->pool;
|
||||
bool ret = false;
|
||||
|
||||
if (!mutex_trylock(&pool->manager_mutex))
|
||||
if (pool->flags & POOL_MANAGING_WORKERS)
|
||||
return ret;
|
||||
|
||||
pool->flags |= POOL_MANAGING_WORKERS;
|
||||
|
||||
/*
|
||||
* To simplify both worker management and CPU hotplug, hold off
|
||||
* management while hotplug is in progress. CPU hotplug path can't
|
||||
* grab %POOL_MANAGING_WORKERS to achieve this because that can
|
||||
* lead to idle worker depletion (all become busy thinking someone
|
||||
* else is managing) which in turn can result in deadlock under
|
||||
* extreme circumstances. Use @pool->manager_mutex to synchronize
|
||||
* manager against CPU hotplug.
|
||||
*
|
||||
* manager_mutex would always be free unless CPU hotplug is in
|
||||
* progress. trylock first without dropping @gcwq->lock.
|
||||
*/
|
||||
if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
|
||||
spin_unlock_irq(&pool->gcwq->lock);
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
/*
|
||||
* CPU hotplug could have happened while we were waiting
|
||||
* for manager_mutex. Hotplug itself can't handle us
|
||||
* because manager isn't either on idle or busy list, and
|
||||
* @gcwq's state and ours could have deviated.
|
||||
*
|
||||
* As hotplug is now excluded via manager_mutex, we can
|
||||
* simply try to bind. It will succeed or fail depending
|
||||
* on @gcwq's current state. Try it and adjust
|
||||
* %WORKER_UNBOUND accordingly.
|
||||
*/
|
||||
if (worker_maybe_bind_and_lock(worker))
|
||||
worker->flags &= ~WORKER_UNBOUND;
|
||||
else
|
||||
worker->flags |= WORKER_UNBOUND;
|
||||
|
||||
ret = true;
|
||||
}
|
||||
|
||||
pool->flags &= ~POOL_MANAGE_WORKERS;
|
||||
|
||||
/*
|
||||
|
@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker)
|
|||
ret |= maybe_destroy_workers(pool);
|
||||
ret |= maybe_create_worker(pool);
|
||||
|
||||
pool->flags &= ~POOL_MANAGING_WORKERS;
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue