mirror of https://gitee.com/openkylin/linux.git
for-5.15/io_uring-2021-09-04
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmEz5eEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpmk1D/wML8Im2erR5s0PaWZgYxXlgEKrJDwJm/p+ 2Uixrn/9kQAhwH+0kJnCiI+HwlL3LU+5/iAdeGtdYMcVaotPPmm5V3jfud8+RuAi E+uIOdULXgQKj8pkiQ2h5mvYd0BxGkGH38gUqilSwFrY2HTpbfxreCHhYoQaE/7o DiGNgbhJglSFIBuIgS4cfpLkI3FdaAmrCydZ9zaqEv/G/bx9aA9lwSbAJadhTbmt Qc1vvbh2FB9YvgZX8qfaneyDKzQbwqTvKxCe2SOVMOp/X0feJym7WZUvrPr04EoZ zBaLDkmn44re4iWPbide7+KQJ8NMQQDBiuxwF5WxdF3hrcsiwqmKgDtBEGWXFMeV CUZ9Osrfb480UKsDExtxLhQqGz1JZqIPZdtDvSJb8MunPZtvTz27NNFyyb9aBrlX WiwEHqAOE1W33buPCNyuYLGDVYis4/TkwF0NZpMwsyPdN0Iz/M8Z5F5BHhC7BYoP U8KMsX3XvddxB113U+IMVqI/SuvT125U65brklQlQeLEHnH57ceII9mNGfNic6LR bcIu7Fb5J1U5nAMeeLCSXsEYXs+peYgI1UOWXaWgSVixUAyU8H+OqsBVIl8eiMjr TTbdIMmfWqENE3wBM709FQQLoMmGl1YjBkGmBXKZjNHcDrf9X56rimSxRD2i2okg r2JczxQ5uQ== =QoQg -----END PGP SIGNATURE----- Merge tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block Pull io_uring fixes from Jens Axboe: "As sometimes happens, two reports came in around the merge window open that led to some fixes. Hence this one is a bit bigger than usual followup fixes, but most of it will be going towards stable, outside of the fixes that are addressing regressions from this merge window. In detail: - postgres is a heavy user of signals between tasks, and if we're unlucky this can interfere with io-wq worker creation. Make sure we're resilient against unrelated signal handling. This set of changes also includes hardening against allocation failures, which could previously had led to stalls. - Some use cases that end up having a mix of bounded and unbounded work would have starvation issues related to that. Split the pending work lists to handle that better. - Completion trace int -> unsigned -> long fix - Fix issue with REGISTER_IOWQ_MAX_WORKERS and SQPOLL - Fix regression with hash wait lock in this merge window - Fix retry issued on block devices (Ming) - Fix regression with links in this merge window (Pavel) - Fix race with multi-shot poll and completions (Xiaoguang) - Ensure regular file IO doesn't inadvertently skip completion batching (Pavel) - Ensure submissions are flushed after running task_work (Pavel)" * tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block: io_uring: io_uring_complete() trace should take an integer io_uring: fix possible poll event lost in multi shot mode io_uring: prolong tctx_task_work() with flushing io_uring: don't disable kiocb_done() CQE batching io_uring: ensure IORING_REGISTER_IOWQ_MAX_WORKERS works with SQPOLL io-wq: make worker creation resilient against signals io-wq: get rid of FIXED worker flag io-wq: only exit on fatal signals io-wq: split bounded and unbounded work into separate lists io-wq: fix queue stalling race io_uring: don't submit half-prepared drain request io_uring: fix queueing half-created requests io-wq: ensure that hash wait lock is IRQ disabling io_uring: retry in case of short read on block device io_uring: IORING_OP_WRITE needs hash_reg_file set io-wq: fix race between adding work and activating a free worker
This commit is contained in:
commit
60f8fbaa95
438
fs/io-wq.c
438
fs/io-wq.c
|
@ -23,8 +23,7 @@ enum {
|
|||
IO_WORKER_F_UP = 1, /* up and active */
|
||||
IO_WORKER_F_RUNNING = 2, /* account as running */
|
||||
IO_WORKER_F_FREE = 4, /* worker on free list */
|
||||
IO_WORKER_F_FIXED = 8, /* static idle worker */
|
||||
IO_WORKER_F_BOUND = 16, /* is doing bounded work */
|
||||
IO_WORKER_F_BOUND = 8, /* is doing bounded work */
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -32,7 +31,7 @@ enum {
|
|||
};
|
||||
|
||||
enum {
|
||||
IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
|
||||
IO_ACCT_STALLED_BIT = 0, /* stalled on hash */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -55,7 +54,10 @@ struct io_worker {
|
|||
struct callback_head create_work;
|
||||
int create_index;
|
||||
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct work;
|
||||
};
|
||||
};
|
||||
|
||||
#if BITS_PER_LONG == 64
|
||||
|
@ -71,25 +73,24 @@ struct io_wqe_acct {
|
|||
unsigned max_workers;
|
||||
int index;
|
||||
atomic_t nr_running;
|
||||
struct io_wq_work_list work_list;
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_WQ_ACCT_BOUND,
|
||||
IO_WQ_ACCT_UNBOUND,
|
||||
IO_WQ_ACCT_NR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-node worker thread pool
|
||||
*/
|
||||
struct io_wqe {
|
||||
struct {
|
||||
raw_spinlock_t lock;
|
||||
struct io_wq_work_list work_list;
|
||||
unsigned flags;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
raw_spinlock_t lock;
|
||||
struct io_wqe_acct acct[2];
|
||||
|
||||
int node;
|
||||
struct io_wqe_acct acct[2];
|
||||
|
||||
struct hlist_nulls_head free_list;
|
||||
struct list_head all_list;
|
||||
|
@ -133,8 +134,11 @@ struct io_cb_cancel_data {
|
|||
bool cancel_all;
|
||||
};
|
||||
|
||||
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
|
||||
static void io_wqe_dec_running(struct io_worker *worker);
|
||||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match);
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
|
@ -195,11 +199,10 @@ static void io_worker_exit(struct io_worker *worker)
|
|||
do_exit(0);
|
||||
}
|
||||
|
||||
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
|
||||
__must_hold(wqe->lock)
|
||||
static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
|
||||
{
|
||||
if (!wq_list_empty(&wqe->work_list) &&
|
||||
!(wqe->flags & IO_WQE_FLAG_STALLED))
|
||||
if (!wq_list_empty(&acct->work_list) &&
|
||||
!test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -208,7 +211,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
|
|||
* Check head of free list for an available worker. If one isn't available,
|
||||
* caller must create one.
|
||||
*/
|
||||
static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
||||
static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct)
|
||||
__must_hold(RCU)
|
||||
{
|
||||
struct hlist_nulls_node *n;
|
||||
|
@ -222,6 +226,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
|||
hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
|
||||
if (!io_worker_get(worker))
|
||||
continue;
|
||||
if (io_wqe_get_acct(worker) != acct) {
|
||||
io_worker_release(worker);
|
||||
continue;
|
||||
}
|
||||
if (wake_up_process(worker->task)) {
|
||||
io_worker_release(worker);
|
||||
return true;
|
||||
|
@ -236,9 +244,9 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
|||
* We need a worker. If we find a free one, we're good. If not, and we're
|
||||
* below the max number of workers, create one.
|
||||
*/
|
||||
static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||
{
|
||||
bool ret;
|
||||
bool do_create = false;
|
||||
|
||||
/*
|
||||
* Most likely an attempt to queue unbounded work on an io_wq that
|
||||
|
@ -247,27 +255,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
|||
if (unlikely(!acct->max_workers))
|
||||
pr_warn_once("io-wq is not configured for unbound workers");
|
||||
|
||||
rcu_read_lock();
|
||||
ret = io_wqe_activate_free_worker(wqe);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!ret) {
|
||||
bool do_create = false, first = false;
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
if (!acct->nr_workers)
|
||||
first = true;
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
create_io_worker(wqe->wq, wqe, acct->index, first);
|
||||
}
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
return create_io_worker(wqe->wq, wqe, acct->index);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_wqe_inc_running(struct io_worker *worker)
|
||||
|
@ -283,7 +283,7 @@ static void create_worker_cb(struct callback_head *cb)
|
|||
struct io_wq *wq;
|
||||
struct io_wqe *wqe;
|
||||
struct io_wqe_acct *acct;
|
||||
bool do_create = false, first = false;
|
||||
bool do_create = false;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
wqe = worker->wqe;
|
||||
|
@ -291,14 +291,12 @@ static void create_worker_cb(struct callback_head *cb)
|
|||
acct = &wqe->acct[worker->create_index];
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
if (!acct->nr_workers)
|
||||
first = true;
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
create_io_worker(wq, wqe, worker->create_index, first);
|
||||
create_io_worker(wq, wqe, worker->create_index);
|
||||
} else {
|
||||
atomic_dec(&acct->nr_running);
|
||||
io_worker_ref_put(wq);
|
||||
|
@ -307,9 +305,11 @@ static void create_worker_cb(struct callback_head *cb)
|
|||
io_worker_release(worker);
|
||||
}
|
||||
|
||||
static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct io_wqe_acct *acct)
|
||||
static bool io_queue_worker_create(struct io_worker *worker,
|
||||
struct io_wqe_acct *acct,
|
||||
task_work_func_t func)
|
||||
{
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
/* raced with exit, just ignore create call */
|
||||
|
@ -327,16 +327,17 @@ static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
|
|||
test_and_set_bit_lock(0, &worker->create_state))
|
||||
goto fail_release;
|
||||
|
||||
init_task_work(&worker->create_work, create_worker_cb);
|
||||
init_task_work(&worker->create_work, func);
|
||||
worker->create_index = acct->index;
|
||||
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
|
||||
return;
|
||||
return true;
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
fail_release:
|
||||
io_worker_release(worker);
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
io_worker_ref_put(wq);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_wqe_dec_running(struct io_worker *worker)
|
||||
|
@ -348,10 +349,10 @@ static void io_wqe_dec_running(struct io_worker *worker)
|
|||
if (!(worker->flags & IO_WORKER_F_UP))
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
|
||||
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
io_queue_worker_create(wqe, worker, acct);
|
||||
io_queue_worker_create(worker, acct, create_worker_cb);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -363,29 +364,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
|
|||
struct io_wq_work *work)
|
||||
__must_hold(wqe->lock)
|
||||
{
|
||||
bool worker_bound, work_bound;
|
||||
|
||||
BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
|
||||
|
||||
if (worker->flags & IO_WORKER_F_FREE) {
|
||||
worker->flags &= ~IO_WORKER_F_FREE;
|
||||
hlist_nulls_del_init_rcu(&worker->nulls_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* If worker is moving from bound to unbound (or vice versa), then
|
||||
* ensure we update the running accounting.
|
||||
*/
|
||||
worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
|
||||
work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
|
||||
if (worker_bound != work_bound) {
|
||||
int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
|
||||
io_wqe_dec_running(worker);
|
||||
worker->flags ^= IO_WORKER_F_BOUND;
|
||||
wqe->acct[index].nr_workers--;
|
||||
wqe->acct[index ^ 1].nr_workers++;
|
||||
io_wqe_inc_running(worker);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -413,7 +395,7 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
|
|||
{
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
spin_lock(&wq->hash->wait.lock);
|
||||
spin_lock_irq(&wq->hash->wait.lock);
|
||||
if (list_empty(&wqe->wait.entry)) {
|
||||
__add_wait_queue(&wq->hash->wait, &wqe->wait);
|
||||
if (!test_bit(hash, &wq->hash->map)) {
|
||||
|
@ -421,48 +403,26 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
|
|||
list_del_init(&wqe->wait.entry);
|
||||
}
|
||||
}
|
||||
spin_unlock(&wq->hash->wait.lock);
|
||||
spin_unlock_irq(&wq->hash->wait.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can always run the work if the worker is currently the same type as
|
||||
* the work (eg both are bound, or both are unbound). If they are not the
|
||||
* same, only allow it if incrementing the worker count would be allowed.
|
||||
*/
|
||||
static bool io_worker_can_run_work(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
if (!(worker->flags & IO_WORKER_F_BOUND) !=
|
||||
!(work->flags & IO_WQ_WORK_UNBOUND))
|
||||
return true;
|
||||
|
||||
/* not the same type, check if we'd go over the limit */
|
||||
acct = io_work_get_acct(worker->wqe, work);
|
||||
return acct->nr_workers < acct->max_workers;
|
||||
}
|
||||
|
||||
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
||||
struct io_worker *worker,
|
||||
bool *stalled)
|
||||
static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
|
||||
struct io_worker *worker)
|
||||
__must_hold(wqe->lock)
|
||||
{
|
||||
struct io_wq_work_node *node, *prev;
|
||||
struct io_wq_work *work, *tail;
|
||||
unsigned int stall_hash = -1U;
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
|
||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||
wq_list_for_each(node, prev, &acct->work_list) {
|
||||
unsigned int hash;
|
||||
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
|
||||
if (!io_worker_can_run_work(worker, work))
|
||||
break;
|
||||
|
||||
/* not hashed, can run anytime */
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
wq_list_del(&wqe->work_list, node, prev);
|
||||
wq_list_del(&acct->work_list, node, prev);
|
||||
return work;
|
||||
}
|
||||
|
||||
|
@ -473,7 +433,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
|||
/* hashed, can run if not already running */
|
||||
if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
|
||||
wqe->hash_tail[hash] = NULL;
|
||||
wq_list_cut(&wqe->work_list, &tail->list, prev);
|
||||
wq_list_cut(&acct->work_list, &tail->list, prev);
|
||||
return work;
|
||||
}
|
||||
if (stall_hash == -1U)
|
||||
|
@ -483,10 +443,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
|||
}
|
||||
|
||||
if (stall_hash != -1U) {
|
||||
/*
|
||||
* Set this before dropping the lock to avoid racing with new
|
||||
* work being added and clearing the stalled bit.
|
||||
*/
|
||||
set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_wait_on_hash(wqe, stall_hash);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
*stalled = true;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -520,13 +484,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
|||
static void io_worker_handle_work(struct io_worker *worker)
|
||||
__releases(wqe->lock)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
|
||||
|
||||
do {
|
||||
struct io_wq_work *work;
|
||||
bool stalled;
|
||||
get_next:
|
||||
/*
|
||||
* If we got some work, mark us as busy. If we didn't, but
|
||||
|
@ -535,12 +499,9 @@ static void io_worker_handle_work(struct io_worker *worker)
|
|||
* can't make progress, any work completion or insertion will
|
||||
* clear the stalled flag.
|
||||
*/
|
||||
stalled = false;
|
||||
work = io_get_next_work(wqe, worker, &stalled);
|
||||
work = io_get_next_work(acct, worker);
|
||||
if (work)
|
||||
__io_worker_busy(wqe, worker, work);
|
||||
else if (stalled)
|
||||
wqe->flags |= IO_WQE_FLAG_STALLED;
|
||||
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (!work)
|
||||
|
@ -572,10 +533,10 @@ static void io_worker_handle_work(struct io_worker *worker)
|
|||
|
||||
if (hash != -1U && !next_hashed) {
|
||||
clear_bit(hash, &wq->hash->map);
|
||||
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
if (wq_has_sleeper(&wq->hash->wait))
|
||||
wake_up(&wq->hash->wait);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||
/* skip unnecessary unlock-lock wqe->lock */
|
||||
if (!work)
|
||||
goto get_next;
|
||||
|
@ -590,8 +551,10 @@ static void io_worker_handle_work(struct io_worker *worker)
|
|||
static int io_wqe_worker(void *data)
|
||||
{
|
||||
struct io_worker *worker = data;
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
bool last_timeout = false;
|
||||
char buf[TASK_COMM_LEN];
|
||||
|
||||
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
||||
|
@ -605,10 +568,17 @@ static int io_wqe_worker(void *data)
|
|||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
loop:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (io_wqe_run_queue(wqe)) {
|
||||
if (io_acct_run_queue(acct)) {
|
||||
io_worker_handle_work(worker);
|
||||
goto loop;
|
||||
}
|
||||
/* timed out, exit unless we're the last worker */
|
||||
if (last_timeout && acct->nr_workers > 1) {
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
break;
|
||||
}
|
||||
last_timeout = false;
|
||||
__io_worker_idle(wqe, worker);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (io_flush_signals())
|
||||
|
@ -619,13 +589,11 @@ static int io_wqe_worker(void *data)
|
|||
|
||||
if (!get_signal(&ksig))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
if (fatal_signal_pending(current))
|
||||
break;
|
||||
continue;
|
||||
/* timed out, exit unless we're the fixed worker */
|
||||
if (!(worker->flags & IO_WORKER_F_FIXED))
|
||||
break;
|
||||
}
|
||||
last_timeout = !ret;
|
||||
}
|
||||
|
||||
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
||||
|
@ -676,36 +644,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
|
|||
raw_spin_unlock(&worker->wqe->lock);
|
||||
}
|
||||
|
||||
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
|
||||
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct io_wqe_acct *acct = &wqe->acct[index];
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
|
||||
if (!worker)
|
||||
goto fail;
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->nulls_node.pprev = NULL;
|
||||
worker->wqe = wqe;
|
||||
spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (IS_ERR(tsk)) {
|
||||
kfree(worker);
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
return;
|
||||
}
|
||||
|
||||
tsk->pf_io_worker = worker;
|
||||
worker->task = tsk;
|
||||
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
|
||||
|
@ -715,14 +656,118 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bo
|
|||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
||||
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
|
||||
worker->flags |= IO_WORKER_F_FREE;
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
worker->flags |= IO_WORKER_F_BOUND;
|
||||
if (first && (worker->flags & IO_WORKER_F_BOUND))
|
||||
worker->flags |= IO_WORKER_F_FIXED;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
wake_up_new_task(tsk);
|
||||
}
|
||||
|
||||
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool io_should_retry_thread(long err)
|
||||
{
|
||||
switch (err) {
|
||||
case -EAGAIN:
|
||||
case -ERESTARTSYS:
|
||||
case -ERESTARTNOINTR:
|
||||
case -ERESTARTNOHAND:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_worker_cont(struct callback_head *cb)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
struct io_wqe *wqe;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
wqe = worker->wqe;
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wqe, worker, tsk);
|
||||
io_worker_release(worker);
|
||||
return;
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
if (!acct->nr_workers) {
|
||||
struct io_cb_cancel_data match = {
|
||||
.fn = io_wq_work_match_all,
|
||||
.cancel_all = true,
|
||||
};
|
||||
|
||||
while (io_acct_cancel_pending_work(wqe, acct, &match))
|
||||
raw_spin_lock(&wqe->lock);
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wqe->wq);
|
||||
return;
|
||||
}
|
||||
|
||||
/* re-create attempts grab a new worker ref, drop the existing one */
|
||||
io_worker_release(worker);
|
||||
schedule_work(&worker->work);
|
||||
}
|
||||
|
||||
static void io_workqueue_create(struct work_struct *work)
|
||||
{
|
||||
struct io_worker *worker = container_of(work, struct io_worker, work);
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
|
||||
if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
}
|
||||
}
|
||||
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
||||
{
|
||||
struct io_wqe_acct *acct = &wqe->acct[index];
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
|
||||
if (!worker) {
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
return false;
|
||||
}
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->wqe = wqe;
|
||||
spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
worker->flags |= IO_WORKER_F_BOUND;
|
||||
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wqe, worker, tsk);
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
goto fail;
|
||||
} else {
|
||||
INIT_WORK(&worker->work, io_workqueue_create);
|
||||
schedule_work(&worker->work);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate the passed in list and call the specific function for each
|
||||
* worker that isn't exiting
|
||||
|
@ -755,11 +800,6 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
||||
{
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
@ -773,12 +813,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
|||
|
||||
static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
unsigned int hash;
|
||||
struct io_wq_work *tail;
|
||||
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
append:
|
||||
wq_list_add_tail(&work->list, &wqe->work_list);
|
||||
wq_list_add_tail(&work->list, &acct->work_list);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -788,13 +829,14 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
|
|||
if (!tail)
|
||||
goto append;
|
||||
|
||||
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
|
||||
wq_list_add_after(&work->list, &tail->list, &acct->work_list);
|
||||
}
|
||||
|
||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
bool do_wake;
|
||||
unsigned work_flags = work->flags;
|
||||
bool do_create;
|
||||
|
||||
/*
|
||||
* If io-wq is exiting for this task, or if the request has explicitly
|
||||
|
@ -802,19 +844,36 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
|||
*/
|
||||
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
|
||||
(work->flags & IO_WQ_WORK_CANCEL)) {
|
||||
run_cancel:
|
||||
io_run_cancel(work, wqe);
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
io_wqe_insert_work(wqe, work);
|
||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||
do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
|
||||
!atomic_read(&acct->nr_running);
|
||||
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
|
||||
rcu_read_lock();
|
||||
do_create = !io_wqe_activate_free_worker(wqe, acct);
|
||||
rcu_read_unlock();
|
||||
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
|
||||
if (do_wake)
|
||||
io_wqe_wake_worker(wqe, acct);
|
||||
if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
|
||||
!atomic_read(&acct->nr_running))) {
|
||||
bool did_create;
|
||||
|
||||
did_create = io_wqe_create_worker(wqe, acct);
|
||||
if (unlikely(!did_create)) {
|
||||
raw_spin_lock(&wqe->lock);
|
||||
/* fatal condition, failed to create the first worker */
|
||||
if (!acct->nr_workers) {
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
goto run_cancel;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
|
@ -859,6 +918,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
|
|||
struct io_wq_work *work,
|
||||
struct io_wq_work_node *prev)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
unsigned int hash = io_get_work_hash(work);
|
||||
struct io_wq_work *prev_work = NULL;
|
||||
|
||||
|
@ -870,18 +930,18 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
|
|||
else
|
||||
wqe->hash_tail[hash] = NULL;
|
||||
}
|
||||
wq_list_del(&wqe->work_list, &work->list, prev);
|
||||
wq_list_del(&acct->work_list, &work->list, prev);
|
||||
}
|
||||
|
||||
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_cb_cancel_data *match)
|
||||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match)
|
||||
__releases(wqe->lock)
|
||||
{
|
||||
struct io_wq_work_node *node, *prev;
|
||||
struct io_wq_work *work;
|
||||
|
||||
retry:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||
wq_list_for_each(node, prev, &acct->work_list) {
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
if (!match->fn(work, match->data))
|
||||
continue;
|
||||
|
@ -889,11 +949,27 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
|||
raw_spin_unlock(&wqe->lock);
|
||||
io_run_cancel(work, wqe);
|
||||
match->nr_pending++;
|
||||
if (!match->cancel_all)
|
||||
return;
|
||||
|
||||
/* not safe to continue after unlock */
|
||||
goto retry;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_cb_cancel_data *match)
|
||||
{
|
||||
int i;
|
||||
retry:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
|
||||
|
||||
if (io_acct_cancel_pending_work(wqe, acct, match)) {
|
||||
if (match->cancel_all)
|
||||
goto retry;
|
||||
return;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
|
@ -954,18 +1030,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
|
|||
int sync, void *key)
|
||||
{
|
||||
struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
|
||||
int i;
|
||||
|
||||
list_del_init(&wait->entry);
|
||||
|
||||
rcu_read_lock();
|
||||
io_wqe_activate_free_worker(wqe);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = &wqe->acct[i];
|
||||
|
||||
if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
|
||||
io_wqe_activate_free_worker(wqe, acct);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||
{
|
||||
int ret, node;
|
||||
int ret, node, i;
|
||||
struct io_wq *wq;
|
||||
|
||||
if (WARN_ON_ONCE(!data->free_work || !data->do_work))
|
||||
|
@ -1000,18 +1082,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
|
||||
wq->wqes[node] = wqe;
|
||||
wqe->node = alloc_node;
|
||||
wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
|
||||
wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
|
||||
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
|
||||
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
|
||||
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
|
||||
task_rlimit(current, RLIMIT_NPROC);
|
||||
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
|
||||
wqe->wait.func = io_wqe_hash_wake;
|
||||
INIT_LIST_HEAD(&wqe->wait.entry);
|
||||
wqe->wait.func = io_wqe_hash_wake;
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = &wqe->acct[i];
|
||||
|
||||
acct->index = i;
|
||||
atomic_set(&acct->nr_running, 0);
|
||||
INIT_WQ_LIST(&acct->work_list);
|
||||
}
|
||||
wqe->wq = wq;
|
||||
raw_spin_lock_init(&wqe->lock);
|
||||
INIT_WQ_LIST(&wqe->work_list);
|
||||
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
|
||||
INIT_LIST_HEAD(&wqe->all_list);
|
||||
}
|
||||
|
@ -1038,7 +1122,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
|
|||
{
|
||||
struct io_worker *worker;
|
||||
|
||||
if (cb->func != create_worker_cb)
|
||||
if (cb->func != create_worker_cb || cb->func != create_worker_cont)
|
||||
return false;
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
return worker->wqe->wq == data;
|
||||
|
@ -1193,7 +1277,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
|
|||
for_each_node(node) {
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
acct = &wq->wqes[node]->acct[i];
|
||||
prev = max_t(int, acct->max_workers, prev);
|
||||
if (new_count[i])
|
||||
|
|
|
@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
|
|||
},
|
||||
[IORING_OP_WRITE] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.plug = 1,
|
||||
|
@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
|
|||
io_req_complete_post(req, res, 0);
|
||||
}
|
||||
|
||||
static void io_req_complete_fail_submit(struct io_kiocb *req)
|
||||
{
|
||||
/*
|
||||
* We don't submit, fail them all, for that replace hardlinks with
|
||||
* normal links. Extra REQ_F_LINK is tolerated.
|
||||
*/
|
||||
req->flags &= ~REQ_F_HARDLINK;
|
||||
req->flags |= REQ_F_LINK;
|
||||
io_req_complete_failed(req, req->result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't initialise the fields below on every allocation, but do that in
|
||||
* advance and keep them valid across allocations.
|
||||
|
@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb)
|
|||
while (1) {
|
||||
struct io_wq_work_node *node;
|
||||
|
||||
if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
|
||||
io_submit_flush_completions(ctx);
|
||||
|
||||
spin_lock_irq(&tctx->task_lock);
|
||||
node = tctx->task_list.first;
|
||||
INIT_WQ_LIST(&tctx->task_list);
|
||||
|
@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
|
|||
{
|
||||
if (__io_complete_rw_common(req, res))
|
||||
return;
|
||||
__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
|
||||
__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
|
||||
}
|
||||
|
||||
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
|
||||
|
@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool need_read_all(struct io_kiocb *req)
|
||||
{
|
||||
return req->flags & REQ_F_ISREG ||
|
||||
S_ISBLK(file_inode(req->file)->i_mode);
|
||||
}
|
||||
|
||||
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||
|
@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
|
|||
} else if (ret == -EIOCBQUEUED) {
|
||||
goto out_free;
|
||||
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
|
||||
(req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
|
||||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
|
||||
/* read all, failed, already did sync or don't want to retry */
|
||||
goto done;
|
||||
}
|
||||
|
@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
|
|||
}
|
||||
}
|
||||
|
||||
static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
__must_hold(&req->ctx->completion_lock)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
|||
if (flags & IORING_CQE_F_MORE)
|
||||
ctx->cq_extra++;
|
||||
|
||||
io_commit_cqring(ctx);
|
||||
return !(flags & IORING_CQE_F_MORE);
|
||||
}
|
||||
|
||||
static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
__must_hold(&req->ctx->completion_lock)
|
||||
{
|
||||
bool done;
|
||||
|
||||
done = __io_poll_complete(req, mask);
|
||||
io_commit_cqring(req->ctx);
|
||||
return done;
|
||||
}
|
||||
|
||||
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
|||
} else {
|
||||
bool done;
|
||||
|
||||
done = io_poll_complete(req, req->result);
|
||||
done = __io_poll_complete(req, req->result);
|
||||
if (done) {
|
||||
io_poll_remove_double(req);
|
||||
hash_del(&req->hash_node);
|
||||
|
@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
|||
req->result = 0;
|
||||
add_wait_queue(req->poll.head, &req->poll.wait);
|
||||
}
|
||||
io_commit_cqring(ctx);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
io_cqring_ev_posted(ctx);
|
||||
|
||||
|
@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req)
|
|||
int ret;
|
||||
u32 seq;
|
||||
|
||||
if (req->flags & REQ_F_FAIL) {
|
||||
io_req_complete_fail_submit(req);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to drain a request in the middle of a link, drain the
|
||||
* head request and the next request/link after the current link.
|
||||
|
@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
|
|||
if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
|
||||
__io_queue_sqe(req);
|
||||
} else if (req->flags & REQ_F_FAIL) {
|
||||
io_req_complete_failed(req, req->result);
|
||||
io_req_complete_fail_submit(req);
|
||||
} else {
|
||||
int ret = io_req_prep_async(req);
|
||||
|
||||
|
@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
|
|||
static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||
void __user *arg)
|
||||
{
|
||||
struct io_uring_task *tctx = current->io_uring;
|
||||
struct io_uring_task *tctx = NULL;
|
||||
struct io_sq_data *sqd = NULL;
|
||||
__u32 new_count[2];
|
||||
int i, ret;
|
||||
|
||||
if (!tctx || !tctx->io_wq)
|
||||
return -EINVAL;
|
||||
if (copy_from_user(new_count, arg, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i] > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
sqd = ctx->sq_data;
|
||||
if (sqd) {
|
||||
mutex_lock(&sqd->lock);
|
||||
tctx = sqd->thread->io_uring;
|
||||
}
|
||||
} else {
|
||||
tctx = current->io_uring;
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!tctx || !tctx->io_wq)
|
||||
goto err;
|
||||
|
||||
ret = io_wq_max_workers(tctx->io_wq, new_count);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
if (sqd)
|
||||
mutex_unlock(&sqd->lock);
|
||||
|
||||
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
if (sqd)
|
||||
mutex_unlock(&sqd->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool io_register_op_must_quiesce(int op)
|
||||
|
|
|
@ -295,14 +295,14 @@ TRACE_EVENT(io_uring_fail_link,
|
|||
*/
|
||||
TRACE_EVENT(io_uring_complete,
|
||||
|
||||
TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
|
||||
TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
|
||||
|
||||
TP_ARGS(ctx, user_data, res, cflags),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( void *, ctx )
|
||||
__field( u64, user_data )
|
||||
__field( long, res )
|
||||
__field( int, res )
|
||||
__field( unsigned, cflags )
|
||||
),
|
||||
|
||||
|
@ -313,7 +313,7 @@ TRACE_EVENT(io_uring_complete,
|
|||
__entry->cflags = cflags;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
|
||||
TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
|
||||
__entry->ctx, (unsigned long long)__entry->user_data,
|
||||
__entry->res, __entry->cflags)
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue