io_uring-20190323
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAlyWVysQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpn5lD/0bEg76kbuwOUy5+FDqOpF0MNOU7xZcYcsI YkkaKkUi2YQL6NJlkU7AhtPwep+J2sgSnDW9Ho9WIXbsnsO6UF79uIdcix6zJGIl WnZZ3BLgWeciCfrzFpn3FFZnm/AKJSPWPmllUFvmUYT9GdRgN4ZnHBsS1HTlJ1m5 5HhwLtaYOsZ75NxWBRqWspmtFe+XZ/CrjGgmvIF8FjSuIP2q0RrOmCF1XAA82umd ehiU1ZtQ+v4FHxmJWjzMWhrCj2y0gmPb+DotIqefFjVnd/G+LrFGMD1fsLoQVFDy L5VzSOGj1E4KXfDpIeGnz/08dpqXmOkvsSaNnv1U7vA7SCkbodR/BA1EKJrvk5v7 MGkkcQDaU/WzC41RCyVQNWAWjzNLKbruXQ+1HqCx5eh7uthvMQMXDvGf4Jgeq+/E vGzrEKZ6qI78Vy0mXSy4dfFbFaNTjCkE2jbIG7BQx5zdtnS9/VPXNkpZxPrGLM+P /fTsLXghU9lKn6WHVtLpQsfJr0OMjyC9JA23pTX2G9MtBhDcyuRs+uCeQgG6cIkl F15LGuOY7YGYxRsegdinFaoldnHersUDx19c+uFdrB0k0A/A6KeGHuZx7aJPkW1L M89FkyJr2ZBgc26PvKz6j1Hwl2MKJC5h8TpPES/QnulWh4FbqqH3a501Qa1AQuxC 1me95iy74w== =l4lx -----END PGP SIGNATURE----- Merge tag 'io_uring-20190323' of git://git.kernel.dk/linux-block Pull io_uring fixes and improvements from Jens Axboe: "The first five in this series are heavily inspired by the work Al did on the aio side to fix the races there. The last two re-introduce a feature that was in io_uring before it got merged, but which I pulled since we didn't have a good way to have BVEC iters that already have a stable reference. These aren't necessarily related to block, it's just how io_uring pins fixed buffers" * tag 'io_uring-20190323' of git://git.kernel.dk/linux-block: block: add BIO_NO_PAGE_REF flag iov_iter: add ITER_BVEC_FLAG_NO_REF flag io_uring: mark me as the maintainer io_uring: retry bulk slab allocs as single allocs io_uring: fix poll races io_uring: fix fget/fput handling io_uring: add prepped flag io_uring: make io_read/write return an integer io_uring: use regular request ref counts
This commit is contained in:
commit
1bdd3dbfff
10
MAINTAINERS
10
MAINTAINERS
|
@ -8096,6 +8096,16 @@ F: include/linux/iommu.h
|
||||||
F: include/linux/of_iommu.h
|
F: include/linux/of_iommu.h
|
||||||
F: include/linux/iova.h
|
F: include/linux/iova.h
|
||||||
|
|
||||||
|
IO_URING
|
||||||
|
M: Jens Axboe <axboe@kernel.dk>
|
||||||
|
L: linux-block@vger.kernel.org
|
||||||
|
L: linux-fsdevel@vger.kernel.org
|
||||||
|
T: git git://git.kernel.dk/linux-block
|
||||||
|
T: git git://git.kernel.dk/liburing
|
||||||
|
S: Maintained
|
||||||
|
F: fs/io_uring.c
|
||||||
|
F: include/uapi/linux/io_uring.h
|
||||||
|
|
||||||
IP MASQUERADING
|
IP MASQUERADING
|
||||||
M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
|
M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
43
block/bio.c
43
block/bio.c
|
@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
|
||||||
size = bio_add_page(bio, bv->bv_page, len,
|
size = bio_add_page(bio, bv->bv_page, len,
|
||||||
bv->bv_offset + iter->iov_offset);
|
bv->bv_offset + iter->iov_offset);
|
||||||
if (size == len) {
|
if (size == len) {
|
||||||
struct page *page;
|
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
|
||||||
int i;
|
struct page *page;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
mp_bvec_for_each_page(page, bv, i)
|
||||||
|
get_page(page);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* For the normal O_DIRECT case, we could skip grabbing this
|
|
||||||
* reference and then not have to put them again when IO
|
|
||||||
* completes. But this breaks some in-kernel users, like
|
|
||||||
* splicing to/from a loop device, where we release the pipe
|
|
||||||
* pages unconditionally. If we can fix that case, we can
|
|
||||||
* get rid of the get here and the need to call
|
|
||||||
* bio_release_pages() at IO completion time.
|
|
||||||
*/
|
|
||||||
mp_bvec_for_each_page(page, bv, i)
|
|
||||||
get_page(page);
|
|
||||||
iov_iter_advance(iter, size);
|
iov_iter_advance(iter, size);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||||
* This takes either an iterator pointing to user memory, or one pointing to
|
* This takes either an iterator pointing to user memory, or one pointing to
|
||||||
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
|
* kernel pages (BVEC iterator). If we're adding user pages, we pin them and
|
||||||
* map them into the kernel. On IO completion, the caller should put those
|
* map them into the kernel. On IO completion, the caller should put those
|
||||||
* pages. For now, when adding kernel pages, we still grab a reference to the
|
* pages. If we're adding kernel pages, and the caller told us it's safe to
|
||||||
* page. This isn't strictly needed for the common case, but some call paths
|
* do so, we just have to add the pages to the bio directly. We don't grab an
|
||||||
* end up releasing pages from eg a pipe and we can't easily control these.
|
* extra reference to those pages (the user should already have that), and we
|
||||||
* See comment in __bio_iov_bvec_add_pages().
|
* don't put the page on IO completion. The caller needs to check if the bio is
|
||||||
|
* flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
|
||||||
|
* released.
|
||||||
*
|
*
|
||||||
* The function tries, but does not guarantee, to pin as many pages as
|
* The function tries, but does not guarantee, to pin as many pages as
|
||||||
* fit into the bio, or are requested in *iter, whatever is smaller. If
|
* fit into the bio, or are requested in *iter, whatever is smaller. If
|
||||||
|
@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
|
||||||
const bool is_bvec = iov_iter_is_bvec(iter);
|
const bool is_bvec = iov_iter_is_bvec(iter);
|
||||||
unsigned short orig_vcnt = bio->bi_vcnt;
|
unsigned short orig_vcnt = bio->bi_vcnt;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is a BVEC iter, then the pages are kernel pages. Don't
|
||||||
|
* release them on IO completion, if the caller asked us to.
|
||||||
|
*/
|
||||||
|
if (is_bvec && iov_iter_bvec_no_ref(iter))
|
||||||
|
bio_set_flag(bio, BIO_NO_PAGE_REF);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work)
|
||||||
next = bio->bi_private;
|
next = bio->bi_private;
|
||||||
|
|
||||||
bio_set_pages_dirty(bio);
|
bio_set_pages_dirty(bio);
|
||||||
bio_release_pages(bio);
|
if (!bio_flagged(bio, BIO_NO_PAGE_REF))
|
||||||
|
bio_release_pages(bio);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio)
|
||||||
goto defer;
|
goto defer;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio_release_pages(bio);
|
if (!bio_flagged(bio, BIO_NO_PAGE_REF))
|
||||||
|
bio_release_pages(bio);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return;
|
return;
|
||||||
defer:
|
defer:
|
||||||
|
|
|
@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
|
||||||
if (should_dirty) {
|
if (should_dirty) {
|
||||||
bio_check_pages_dirty(bio);
|
bio_check_pages_dirty(bio);
|
||||||
} else {
|
} else {
|
||||||
struct bio_vec *bvec;
|
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
|
||||||
int i;
|
struct bvec_iter_all iter_all;
|
||||||
struct bvec_iter_all iter_all;
|
struct bio_vec *bvec;
|
||||||
|
int i;
|
||||||
|
|
||||||
bio_for_each_segment_all(bvec, bio, i, iter_all)
|
bio_for_each_segment_all(bvec, bio, i, iter_all)
|
||||||
put_page(bvec->bv_page);
|
put_page(bvec->bv_page);
|
||||||
|
}
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
447
fs/io_uring.c
447
fs/io_uring.c
|
@ -189,17 +189,28 @@ struct sqe_submit {
|
||||||
bool needs_fixed_file;
|
bool needs_fixed_file;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First field must be the file pointer in all the
|
||||||
|
* iocb unions! See also 'struct kiocb' in <linux/fs.h>
|
||||||
|
*/
|
||||||
struct io_poll_iocb {
|
struct io_poll_iocb {
|
||||||
struct file *file;
|
struct file *file;
|
||||||
struct wait_queue_head *head;
|
struct wait_queue_head *head;
|
||||||
__poll_t events;
|
__poll_t events;
|
||||||
bool woken;
|
bool done;
|
||||||
bool canceled;
|
bool canceled;
|
||||||
struct wait_queue_entry wait;
|
struct wait_queue_entry wait;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE! Each of the iocb union members has the file pointer
|
||||||
|
* as the first entry in their struct definition. So you can
|
||||||
|
* access the file pointer through any of the sub-structs,
|
||||||
|
* or directly as just 'ki_filp' in this struct.
|
||||||
|
*/
|
||||||
struct io_kiocb {
|
struct io_kiocb {
|
||||||
union {
|
union {
|
||||||
|
struct file *file;
|
||||||
struct kiocb rw;
|
struct kiocb rw;
|
||||||
struct io_poll_iocb poll;
|
struct io_poll_iocb poll;
|
||||||
};
|
};
|
||||||
|
@ -214,6 +225,7 @@ struct io_kiocb {
|
||||||
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
|
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
|
||||||
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
|
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
|
||||||
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
|
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
|
||||||
|
#define REQ_F_PREPPED 16 /* prep already done */
|
||||||
u64 user_data;
|
u64 user_data;
|
||||||
u64 error;
|
u64 error;
|
||||||
|
|
||||||
|
@ -355,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
|
static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
|
||||||
|
{
|
||||||
|
if (waitqueue_active(&ctx->wait))
|
||||||
|
wake_up(&ctx->wait);
|
||||||
|
if (waitqueue_active(&ctx->sqo_wait))
|
||||||
|
wake_up(&ctx->sqo_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
|
||||||
long res, unsigned ev_flags)
|
long res, unsigned ev_flags)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&ctx->completion_lock, flags);
|
spin_lock_irqsave(&ctx->completion_lock, flags);
|
||||||
io_cqring_fill_event(ctx, ki_user_data, res, ev_flags);
|
io_cqring_fill_event(ctx, user_data, res, ev_flags);
|
||||||
io_commit_cqring(ctx);
|
io_commit_cqring(ctx);
|
||||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||||
|
|
||||||
if (waitqueue_active(&ctx->wait))
|
io_cqring_ev_posted(ctx);
|
||||||
wake_up(&ctx->wait);
|
|
||||||
if (waitqueue_active(&ctx->sqo_wait))
|
|
||||||
wake_up(&ctx->sqo_wait);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
|
static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
|
||||||
|
@ -382,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
|
||||||
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
|
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
|
||||||
struct io_submit_state *state)
|
struct io_submit_state *state)
|
||||||
{
|
{
|
||||||
|
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
|
||||||
struct io_kiocb *req;
|
struct io_kiocb *req;
|
||||||
|
|
||||||
if (!percpu_ref_tryget(&ctx->refs))
|
if (!percpu_ref_tryget(&ctx->refs))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (!state) {
|
if (!state) {
|
||||||
req = kmem_cache_alloc(req_cachep, __GFP_NOWARN);
|
req = kmem_cache_alloc(req_cachep, gfp);
|
||||||
if (unlikely(!req))
|
if (unlikely(!req))
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!state->free_reqs) {
|
} else if (!state->free_reqs) {
|
||||||
|
@ -396,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
|
sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
|
||||||
ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz,
|
ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
|
||||||
state->reqs);
|
|
||||||
if (unlikely(ret <= 0))
|
/*
|
||||||
goto out;
|
* Bulk alloc is all-or-nothing. If we fail to get a batch,
|
||||||
|
* retry single alloc to be on the safe side.
|
||||||
|
*/
|
||||||
|
if (unlikely(ret <= 0)) {
|
||||||
|
state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
|
||||||
|
if (!state->reqs[0])
|
||||||
|
goto out;
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
state->free_reqs = ret - 1;
|
state->free_reqs = ret - 1;
|
||||||
state->cur_req = 1;
|
state->cur_req = 1;
|
||||||
req = state->reqs[0];
|
req = state->reqs[0];
|
||||||
|
@ -411,7 +437,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
|
||||||
|
|
||||||
req->ctx = ctx;
|
req->ctx = ctx;
|
||||||
req->flags = 0;
|
req->flags = 0;
|
||||||
refcount_set(&req->refs, 0);
|
/* one is dropped after submission, the other at completion */
|
||||||
|
refcount_set(&req->refs, 2);
|
||||||
return req;
|
return req;
|
||||||
out:
|
out:
|
||||||
io_ring_drop_ctx_refs(ctx, 1);
|
io_ring_drop_ctx_refs(ctx, 1);
|
||||||
|
@ -429,10 +456,16 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
|
||||||
|
|
||||||
static void io_free_req(struct io_kiocb *req)
|
static void io_free_req(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) {
|
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
|
||||||
io_ring_drop_ctx_refs(req->ctx, 1);
|
fput(req->file);
|
||||||
kmem_cache_free(req_cachep, req);
|
io_ring_drop_ctx_refs(req->ctx, 1);
|
||||||
}
|
kmem_cache_free(req_cachep, req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_put_req(struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
if (refcount_dec_and_test(&req->refs))
|
||||||
|
io_free_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -442,44 +475,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
|
||||||
struct list_head *done)
|
struct list_head *done)
|
||||||
{
|
{
|
||||||
void *reqs[IO_IOPOLL_BATCH];
|
void *reqs[IO_IOPOLL_BATCH];
|
||||||
int file_count, to_free;
|
|
||||||
struct file *file = NULL;
|
|
||||||
struct io_kiocb *req;
|
struct io_kiocb *req;
|
||||||
|
int to_free;
|
||||||
|
|
||||||
file_count = to_free = 0;
|
to_free = 0;
|
||||||
while (!list_empty(done)) {
|
while (!list_empty(done)) {
|
||||||
req = list_first_entry(done, struct io_kiocb, list);
|
req = list_first_entry(done, struct io_kiocb, list);
|
||||||
list_del(&req->list);
|
list_del(&req->list);
|
||||||
|
|
||||||
io_cqring_fill_event(ctx, req->user_data, req->error, 0);
|
io_cqring_fill_event(ctx, req->user_data, req->error, 0);
|
||||||
|
|
||||||
reqs[to_free++] = req;
|
|
||||||
(*nr_events)++;
|
(*nr_events)++;
|
||||||
|
|
||||||
/*
|
if (refcount_dec_and_test(&req->refs)) {
|
||||||
* Batched puts of the same file, to avoid dirtying the
|
/* If we're not using fixed files, we have to pair the
|
||||||
* file usage count multiple times, if avoidable.
|
* completion part with the file put. Use regular
|
||||||
*/
|
* completions for those, only batch free for fixed
|
||||||
if (!(req->flags & REQ_F_FIXED_FILE)) {
|
* file.
|
||||||
if (!file) {
|
*/
|
||||||
file = req->rw.ki_filp;
|
if (req->flags & REQ_F_FIXED_FILE) {
|
||||||
file_count = 1;
|
reqs[to_free++] = req;
|
||||||
} else if (file == req->rw.ki_filp) {
|
if (to_free == ARRAY_SIZE(reqs))
|
||||||
file_count++;
|
io_free_req_many(ctx, reqs, &to_free);
|
||||||
} else {
|
} else {
|
||||||
fput_many(file, file_count);
|
io_free_req(req);
|
||||||
file = req->rw.ki_filp;
|
|
||||||
file_count = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_free == ARRAY_SIZE(reqs))
|
|
||||||
io_free_req_many(ctx, reqs, &to_free);
|
|
||||||
}
|
}
|
||||||
io_commit_cqring(ctx);
|
|
||||||
|
|
||||||
if (file)
|
io_commit_cqring(ctx);
|
||||||
fput_many(file, file_count);
|
|
||||||
io_free_req_many(ctx, reqs, &to_free);
|
io_free_req_many(ctx, reqs, &to_free);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -602,21 +625,14 @@ static void kiocb_end_write(struct kiocb *kiocb)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_fput(struct io_kiocb *req)
|
|
||||||
{
|
|
||||||
if (!(req->flags & REQ_F_FIXED_FILE))
|
|
||||||
fput(req->rw.ki_filp);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
|
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
|
||||||
{
|
{
|
||||||
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
|
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
|
||||||
|
|
||||||
kiocb_end_write(kiocb);
|
kiocb_end_write(kiocb);
|
||||||
|
|
||||||
io_fput(req);
|
|
||||||
io_cqring_add_event(req->ctx, req->user_data, res, 0);
|
io_cqring_add_event(req->ctx, req->user_data, res, 0);
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
|
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
|
||||||
|
@ -731,31 +747,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
const struct io_uring_sqe *sqe = s->sqe;
|
const struct io_uring_sqe *sqe = s->sqe;
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
struct kiocb *kiocb = &req->rw;
|
struct kiocb *kiocb = &req->rw;
|
||||||
unsigned ioprio, flags;
|
unsigned ioprio;
|
||||||
int fd, ret;
|
int ret;
|
||||||
|
|
||||||
|
if (!req->file)
|
||||||
|
return -EBADF;
|
||||||
/* For -EAGAIN retry, everything is already prepped */
|
/* For -EAGAIN retry, everything is already prepped */
|
||||||
if (kiocb->ki_filp)
|
if (req->flags & REQ_F_PREPPED)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
flags = READ_ONCE(sqe->flags);
|
if (force_nonblock && !io_file_supports_async(req->file))
|
||||||
fd = READ_ONCE(sqe->fd);
|
force_nonblock = false;
|
||||||
|
|
||||||
if (flags & IOSQE_FIXED_FILE) {
|
|
||||||
if (unlikely(!ctx->user_files ||
|
|
||||||
(unsigned) fd >= ctx->nr_user_files))
|
|
||||||
return -EBADF;
|
|
||||||
kiocb->ki_filp = ctx->user_files[fd];
|
|
||||||
req->flags |= REQ_F_FIXED_FILE;
|
|
||||||
} else {
|
|
||||||
if (s->needs_fixed_file)
|
|
||||||
return -EBADF;
|
|
||||||
kiocb->ki_filp = io_file_get(state, fd);
|
|
||||||
if (unlikely(!kiocb->ki_filp))
|
|
||||||
return -EBADF;
|
|
||||||
if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
|
|
||||||
force_nonblock = false;
|
|
||||||
}
|
|
||||||
kiocb->ki_pos = READ_ONCE(sqe->off);
|
kiocb->ki_pos = READ_ONCE(sqe->off);
|
||||||
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
|
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
|
||||||
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
|
kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
|
||||||
|
@ -764,7 +767,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
if (ioprio) {
|
if (ioprio) {
|
||||||
ret = ioprio_check_cap(ioprio);
|
ret = ioprio_check_cap(ioprio);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_fput;
|
return ret;
|
||||||
|
|
||||||
kiocb->ki_ioprio = ioprio;
|
kiocb->ki_ioprio = ioprio;
|
||||||
} else
|
} else
|
||||||
|
@ -772,38 +775,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
|
|
||||||
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
|
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
goto out_fput;
|
return ret;
|
||||||
if (force_nonblock) {
|
if (force_nonblock) {
|
||||||
kiocb->ki_flags |= IOCB_NOWAIT;
|
kiocb->ki_flags |= IOCB_NOWAIT;
|
||||||
req->flags |= REQ_F_FORCE_NONBLOCK;
|
req->flags |= REQ_F_FORCE_NONBLOCK;
|
||||||
}
|
}
|
||||||
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
||||||
ret = -EOPNOTSUPP;
|
|
||||||
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
|
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
|
||||||
!kiocb->ki_filp->f_op->iopoll)
|
!kiocb->ki_filp->f_op->iopoll)
|
||||||
goto out_fput;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
req->error = 0;
|
req->error = 0;
|
||||||
kiocb->ki_flags |= IOCB_HIPRI;
|
kiocb->ki_flags |= IOCB_HIPRI;
|
||||||
kiocb->ki_complete = io_complete_rw_iopoll;
|
kiocb->ki_complete = io_complete_rw_iopoll;
|
||||||
} else {
|
} else {
|
||||||
if (kiocb->ki_flags & IOCB_HIPRI) {
|
if (kiocb->ki_flags & IOCB_HIPRI)
|
||||||
ret = -EINVAL;
|
return -EINVAL;
|
||||||
goto out_fput;
|
|
||||||
}
|
|
||||||
kiocb->ki_complete = io_complete_rw;
|
kiocb->ki_complete = io_complete_rw;
|
||||||
}
|
}
|
||||||
|
req->flags |= REQ_F_PREPPED;
|
||||||
return 0;
|
return 0;
|
||||||
out_fput:
|
|
||||||
if (!(flags & IOSQE_FIXED_FILE)) {
|
|
||||||
/*
|
|
||||||
* in case of error, we didn't use this file reference. drop it.
|
|
||||||
*/
|
|
||||||
if (state)
|
|
||||||
state->used_refs--;
|
|
||||||
io_file_put(state, kiocb->ki_filp);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
|
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
|
||||||
|
@ -864,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
|
||||||
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
|
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
|
||||||
if (offset)
|
if (offset)
|
||||||
iov_iter_advance(iter, offset);
|
iov_iter_advance(iter, offset);
|
||||||
|
|
||||||
|
/* don't drop a reference to these pages */
|
||||||
|
iter->type |= ITER_BVEC_FLAG_NO_REF;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -887,7 +881,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
|
||||||
opcode = READ_ONCE(sqe->opcode);
|
opcode = READ_ONCE(sqe->opcode);
|
||||||
if (opcode == IORING_OP_READ_FIXED ||
|
if (opcode == IORING_OP_READ_FIXED ||
|
||||||
opcode == IORING_OP_WRITE_FIXED) {
|
opcode == IORING_OP_WRITE_FIXED) {
|
||||||
ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
|
int ret = io_import_fixed(ctx, rw, sqe, iter);
|
||||||
*iovec = NULL;
|
*iovec = NULL;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -945,31 +939,29 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
|
||||||
async_list->io_end = io_end;
|
async_list->io_end = io_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
|
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
bool force_nonblock, struct io_submit_state *state)
|
bool force_nonblock, struct io_submit_state *state)
|
||||||
{
|
{
|
||||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||||
struct kiocb *kiocb = &req->rw;
|
struct kiocb *kiocb = &req->rw;
|
||||||
struct iov_iter iter;
|
struct iov_iter iter;
|
||||||
struct file *file;
|
struct file *file;
|
||||||
size_t iov_count;
|
size_t iov_count;
|
||||||
ssize_t ret;
|
int ret;
|
||||||
|
|
||||||
ret = io_prep_rw(req, s, force_nonblock, state);
|
ret = io_prep_rw(req, s, force_nonblock, state);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
file = kiocb->ki_filp;
|
file = kiocb->ki_filp;
|
||||||
|
|
||||||
ret = -EBADF;
|
|
||||||
if (unlikely(!(file->f_mode & FMODE_READ)))
|
if (unlikely(!(file->f_mode & FMODE_READ)))
|
||||||
goto out_fput;
|
return -EBADF;
|
||||||
ret = -EINVAL;
|
|
||||||
if (unlikely(!file->f_op->read_iter))
|
if (unlikely(!file->f_op->read_iter))
|
||||||
goto out_fput;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
|
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_fput;
|
return ret;
|
||||||
|
|
||||||
iov_count = iov_iter_count(&iter);
|
iov_count = iov_iter_count(&iter);
|
||||||
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
|
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
|
||||||
|
@ -991,38 +983,32 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
kfree(iovec);
|
kfree(iovec);
|
||||||
out_fput:
|
|
||||||
/* Hold on to the file for -EAGAIN */
|
|
||||||
if (unlikely(ret && ret != -EAGAIN))
|
|
||||||
io_fput(req);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
|
static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
bool force_nonblock, struct io_submit_state *state)
|
bool force_nonblock, struct io_submit_state *state)
|
||||||
{
|
{
|
||||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||||
struct kiocb *kiocb = &req->rw;
|
struct kiocb *kiocb = &req->rw;
|
||||||
struct iov_iter iter;
|
struct iov_iter iter;
|
||||||
struct file *file;
|
struct file *file;
|
||||||
size_t iov_count;
|
size_t iov_count;
|
||||||
ssize_t ret;
|
int ret;
|
||||||
|
|
||||||
ret = io_prep_rw(req, s, force_nonblock, state);
|
ret = io_prep_rw(req, s, force_nonblock, state);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = -EBADF;
|
|
||||||
file = kiocb->ki_filp;
|
file = kiocb->ki_filp;
|
||||||
if (unlikely(!(file->f_mode & FMODE_WRITE)))
|
if (unlikely(!(file->f_mode & FMODE_WRITE)))
|
||||||
goto out_fput;
|
return -EBADF;
|
||||||
ret = -EINVAL;
|
|
||||||
if (unlikely(!file->f_op->write_iter))
|
if (unlikely(!file->f_op->write_iter))
|
||||||
goto out_fput;
|
return -EINVAL;
|
||||||
|
|
||||||
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
|
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_fput;
|
return ret;
|
||||||
|
|
||||||
iov_count = iov_iter_count(&iter);
|
iov_count = iov_iter_count(&iter);
|
||||||
|
|
||||||
|
@ -1054,10 +1040,6 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
|
||||||
}
|
}
|
||||||
out_free:
|
out_free:
|
||||||
kfree(iovec);
|
kfree(iovec);
|
||||||
out_fput:
|
|
||||||
/* Hold on to the file for -EAGAIN */
|
|
||||||
if (unlikely(ret && ret != -EAGAIN))
|
|
||||||
io_fput(req);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1072,29 +1054,19 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
|
||||||
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
|
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
|
||||||
* Twilight zone - it's possible that someone issued an opcode that
|
|
||||||
* has a file attached, then got -EAGAIN on submission, and changed
|
|
||||||
* the sqe before we retried it from async context. Avoid dropping
|
|
||||||
* a file reference for this malicious case, and flag the error.
|
|
||||||
*/
|
|
||||||
if (req->rw.ki_filp) {
|
|
||||||
err = -EBADF;
|
|
||||||
io_fput(req);
|
|
||||||
}
|
|
||||||
io_cqring_add_event(ctx, user_data, err, 0);
|
io_cqring_add_event(ctx, user_data, err, 0);
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
unsigned flags;
|
|
||||||
int fd;
|
|
||||||
|
|
||||||
/* Prep already done */
|
if (!req->file)
|
||||||
if (req->rw.ki_filp)
|
return -EBADF;
|
||||||
|
/* Prep already done (EAGAIN retry) */
|
||||||
|
if (req->flags & REQ_F_PREPPED)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
|
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
|
||||||
|
@ -1102,20 +1074,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
|
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
fd = READ_ONCE(sqe->fd);
|
req->flags |= REQ_F_PREPPED;
|
||||||
flags = READ_ONCE(sqe->flags);
|
|
||||||
|
|
||||||
if (flags & IOSQE_FIXED_FILE) {
|
|
||||||
if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
|
|
||||||
return -EBADF;
|
|
||||||
req->rw.ki_filp = ctx->user_files[fd];
|
|
||||||
req->flags |= REQ_F_FIXED_FILE;
|
|
||||||
} else {
|
|
||||||
req->rw.ki_filp = fget(fd);
|
|
||||||
if (unlikely(!req->rw.ki_filp))
|
|
||||||
return -EBADF;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1144,9 +1103,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||||
end > 0 ? end : LLONG_MAX,
|
end > 0 ? end : LLONG_MAX,
|
||||||
fsync_flags & IORING_FSYNC_DATASYNC);
|
fsync_flags & IORING_FSYNC_DATASYNC);
|
||||||
|
|
||||||
io_fput(req);
|
|
||||||
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
|
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1204,15 +1162,16 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
|
|
||||||
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
|
io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||||
|
__poll_t mask)
|
||||||
{
|
{
|
||||||
io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0);
|
req->poll.done = true;
|
||||||
io_fput(req);
|
io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
|
||||||
io_free_req(req);
|
io_commit_cqring(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_poll_complete_work(struct work_struct *work)
|
static void io_poll_complete_work(struct work_struct *work)
|
||||||
|
@ -1240,9 +1199,11 @@ static void io_poll_complete_work(struct work_struct *work)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
list_del_init(&req->list);
|
list_del_init(&req->list);
|
||||||
|
io_poll_complete(ctx, req, mask);
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
|
|
||||||
io_poll_complete(req, mask);
|
io_cqring_ev_posted(ctx);
|
||||||
|
io_put_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
|
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
|
||||||
|
@ -1253,29 +1214,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
|
||||||
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
|
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
__poll_t mask = key_to_poll(key);
|
__poll_t mask = key_to_poll(key);
|
||||||
|
unsigned long flags;
|
||||||
poll->woken = true;
|
|
||||||
|
|
||||||
/* for instances that support it check for an event match first: */
|
/* for instances that support it check for an event match first: */
|
||||||
if (mask) {
|
if (mask && !(mask & poll->events))
|
||||||
unsigned long flags;
|
return 0;
|
||||||
|
|
||||||
if (!(mask & poll->events))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* try to complete the iocb inline if we can: */
|
|
||||||
if (spin_trylock_irqsave(&ctx->completion_lock, flags)) {
|
|
||||||
list_del(&req->list);
|
|
||||||
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
|
||||||
|
|
||||||
list_del_init(&poll->wait.entry);
|
|
||||||
io_poll_complete(req, mask);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list_del_init(&poll->wait.entry);
|
list_del_init(&poll->wait.entry);
|
||||||
queue_work(ctx->sqo_wq, &req->work);
|
|
||||||
|
if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
|
||||||
|
list_del(&req->list);
|
||||||
|
io_poll_complete(ctx, req, mask);
|
||||||
|
spin_unlock_irqrestore(&ctx->completion_lock, flags);
|
||||||
|
|
||||||
|
io_cqring_ev_posted(ctx);
|
||||||
|
io_put_req(req);
|
||||||
|
} else {
|
||||||
|
queue_work(ctx->sqo_wq, &req->work);
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1305,36 +1262,23 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
struct io_poll_iocb *poll = &req->poll;
|
struct io_poll_iocb *poll = &req->poll;
|
||||||
struct io_ring_ctx *ctx = req->ctx;
|
struct io_ring_ctx *ctx = req->ctx;
|
||||||
struct io_poll_table ipt;
|
struct io_poll_table ipt;
|
||||||
unsigned flags;
|
bool cancel = false;
|
||||||
__poll_t mask;
|
__poll_t mask;
|
||||||
u16 events;
|
u16 events;
|
||||||
int fd;
|
|
||||||
|
|
||||||
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
|
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
if (!poll->file)
|
||||||
|
return -EBADF;
|
||||||
|
|
||||||
INIT_WORK(&req->work, io_poll_complete_work);
|
INIT_WORK(&req->work, io_poll_complete_work);
|
||||||
events = READ_ONCE(sqe->poll_events);
|
events = READ_ONCE(sqe->poll_events);
|
||||||
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
|
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
|
||||||
|
|
||||||
flags = READ_ONCE(sqe->flags);
|
|
||||||
fd = READ_ONCE(sqe->fd);
|
|
||||||
|
|
||||||
if (flags & IOSQE_FIXED_FILE) {
|
|
||||||
if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
|
|
||||||
return -EBADF;
|
|
||||||
poll->file = ctx->user_files[fd];
|
|
||||||
req->flags |= REQ_F_FIXED_FILE;
|
|
||||||
} else {
|
|
||||||
poll->file = fget(fd);
|
|
||||||
}
|
|
||||||
if (unlikely(!poll->file))
|
|
||||||
return -EBADF;
|
|
||||||
|
|
||||||
poll->head = NULL;
|
poll->head = NULL;
|
||||||
poll->woken = false;
|
poll->done = false;
|
||||||
poll->canceled = false;
|
poll->canceled = false;
|
||||||
|
|
||||||
ipt.pt._qproc = io_poll_queue_proc;
|
ipt.pt._qproc = io_poll_queue_proc;
|
||||||
|
@ -1346,56 +1290,44 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
INIT_LIST_HEAD(&poll->wait.entry);
|
INIT_LIST_HEAD(&poll->wait.entry);
|
||||||
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
|
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
|
||||||
|
|
||||||
/* one for removal from waitqueue, one for this function */
|
|
||||||
refcount_set(&req->refs, 2);
|
|
||||||
|
|
||||||
mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
|
mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
|
||||||
if (unlikely(!poll->head)) {
|
|
||||||
/* we did not manage to set up a waitqueue, done */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock_irq(&ctx->completion_lock);
|
spin_lock_irq(&ctx->completion_lock);
|
||||||
spin_lock(&poll->head->lock);
|
if (likely(poll->head)) {
|
||||||
if (poll->woken) {
|
spin_lock(&poll->head->lock);
|
||||||
/* wake_up context handles the rest */
|
if (unlikely(list_empty(&poll->wait.entry))) {
|
||||||
mask = 0;
|
if (ipt.error)
|
||||||
ipt.error = 0;
|
cancel = true;
|
||||||
} else if (mask || ipt.error) {
|
ipt.error = 0;
|
||||||
/* if we get an error or a mask we are done */
|
mask = 0;
|
||||||
WARN_ON_ONCE(list_empty(&poll->wait.entry));
|
}
|
||||||
list_del_init(&poll->wait.entry);
|
if (mask || ipt.error)
|
||||||
} else {
|
list_del_init(&poll->wait.entry);
|
||||||
/* actually waiting for an event */
|
else if (cancel)
|
||||||
list_add_tail(&req->list, &ctx->cancel_list);
|
WRITE_ONCE(poll->canceled, true);
|
||||||
|
else if (!poll->done) /* actually waiting for an event */
|
||||||
|
list_add_tail(&req->list, &ctx->cancel_list);
|
||||||
|
spin_unlock(&poll->head->lock);
|
||||||
|
}
|
||||||
|
if (mask) { /* no async, we'd stolen it */
|
||||||
|
req->error = mangle_poll(mask);
|
||||||
|
ipt.error = 0;
|
||||||
|
io_poll_complete(ctx, req, mask);
|
||||||
}
|
}
|
||||||
spin_unlock(&poll->head->lock);
|
|
||||||
spin_unlock_irq(&ctx->completion_lock);
|
spin_unlock_irq(&ctx->completion_lock);
|
||||||
|
|
||||||
out:
|
if (mask) {
|
||||||
if (unlikely(ipt.error)) {
|
io_cqring_ev_posted(ctx);
|
||||||
if (!(flags & IOSQE_FIXED_FILE))
|
io_put_req(req);
|
||||||
fput(poll->file);
|
|
||||||
/*
|
|
||||||
* Drop one of our refs to this req, __io_submit_sqe() will
|
|
||||||
* drop the other one since we're returning an error.
|
|
||||||
*/
|
|
||||||
io_free_req(req);
|
|
||||||
return ipt.error;
|
|
||||||
}
|
}
|
||||||
|
return ipt.error;
|
||||||
if (mask)
|
|
||||||
io_poll_complete(req, mask);
|
|
||||||
io_free_req(req);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||||
const struct sqe_submit *s, bool force_nonblock,
|
const struct sqe_submit *s, bool force_nonblock,
|
||||||
struct io_submit_state *state)
|
struct io_submit_state *state)
|
||||||
{
|
{
|
||||||
ssize_t ret;
|
int ret, opcode;
|
||||||
int opcode;
|
|
||||||
|
|
||||||
if (unlikely(s->index >= ctx->sq_entries))
|
if (unlikely(s->index >= ctx->sq_entries))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -1524,10 +1456,13 @@ static void io_sq_wq_submit_work(struct work_struct *work)
|
||||||
break;
|
break;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
|
/* drop submission reference */
|
||||||
|
io_put_req(req);
|
||||||
}
|
}
|
||||||
if (ret) {
|
if (ret) {
|
||||||
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
|
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* async context always use a copy of the sqe */
|
/* async context always use a copy of the sqe */
|
||||||
|
@ -1614,11 +1549,55 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool io_op_needs_file(const struct io_uring_sqe *sqe)
|
||||||
|
{
|
||||||
|
int op = READ_ONCE(sqe->opcode);
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case IORING_OP_NOP:
|
||||||
|
case IORING_OP_POLL_REMOVE:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
|
||||||
|
struct io_submit_state *state, struct io_kiocb *req)
|
||||||
|
{
|
||||||
|
unsigned flags;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
flags = READ_ONCE(s->sqe->flags);
|
||||||
|
fd = READ_ONCE(s->sqe->fd);
|
||||||
|
|
||||||
|
if (!io_op_needs_file(s->sqe)) {
|
||||||
|
req->file = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & IOSQE_FIXED_FILE) {
|
||||||
|
if (unlikely(!ctx->user_files ||
|
||||||
|
(unsigned) fd >= ctx->nr_user_files))
|
||||||
|
return -EBADF;
|
||||||
|
req->file = ctx->user_files[fd];
|
||||||
|
req->flags |= REQ_F_FIXED_FILE;
|
||||||
|
} else {
|
||||||
|
if (s->needs_fixed_file)
|
||||||
|
return -EBADF;
|
||||||
|
req->file = io_file_get(state, fd);
|
||||||
|
if (unlikely(!req->file))
|
||||||
|
return -EBADF;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
|
static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
|
||||||
struct io_submit_state *state)
|
struct io_submit_state *state)
|
||||||
{
|
{
|
||||||
struct io_kiocb *req;
|
struct io_kiocb *req;
|
||||||
ssize_t ret;
|
int ret;
|
||||||
|
|
||||||
/* enforce forwards compatibility on users */
|
/* enforce forwards compatibility on users */
|
||||||
if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
|
if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
|
||||||
|
@ -1628,7 +1607,9 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
|
||||||
if (unlikely(!req))
|
if (unlikely(!req))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
req->rw.ki_filp = NULL;
|
ret = io_req_set_file(ctx, s, state, req);
|
||||||
|
if (unlikely(ret))
|
||||||
|
goto out;
|
||||||
|
|
||||||
ret = __io_submit_sqe(ctx, req, s, true, state);
|
ret = __io_submit_sqe(ctx, req, s, true, state);
|
||||||
if (ret == -EAGAIN) {
|
if (ret == -EAGAIN) {
|
||||||
|
@ -1649,11 +1630,23 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
|
||||||
INIT_WORK(&req->work, io_sq_wq_submit_work);
|
INIT_WORK(&req->work, io_sq_wq_submit_work);
|
||||||
queue_work(ctx->sqo_wq, &req->work);
|
queue_work(ctx->sqo_wq, &req->work);
|
||||||
}
|
}
|
||||||
ret = 0;
|
|
||||||
|
/*
|
||||||
|
* Queued up for async execution, worker will release
|
||||||
|
* submit reference when the iocb is actually
|
||||||
|
* submitted.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
/* drop submission reference */
|
||||||
|
io_put_req(req);
|
||||||
|
|
||||||
|
/* and drop final reference, if we failed */
|
||||||
if (ret)
|
if (ret)
|
||||||
io_free_req(req);
|
io_put_req(req);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
12
fs/iomap.c
12
fs/iomap.c
|
@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
|
||||||
if (should_dirty) {
|
if (should_dirty) {
|
||||||
bio_check_pages_dirty(bio);
|
bio_check_pages_dirty(bio);
|
||||||
} else {
|
} else {
|
||||||
struct bio_vec *bvec;
|
if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
|
||||||
int i;
|
struct bvec_iter_all iter_all;
|
||||||
struct bvec_iter_all iter_all;
|
struct bio_vec *bvec;
|
||||||
|
int i;
|
||||||
|
|
||||||
bio_for_each_segment_all(bvec, bio, i, iter_all)
|
bio_for_each_segment_all(bvec, bio, i, iter_all)
|
||||||
put_page(bvec->bv_page);
|
put_page(bvec->bv_page);
|
||||||
|
}
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,6 +215,7 @@ struct bio {
|
||||||
/*
|
/*
|
||||||
* bio flags
|
* bio flags
|
||||||
*/
|
*/
|
||||||
|
#define BIO_NO_PAGE_REF 0 /* don't put release vec pages */
|
||||||
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
|
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
|
||||||
#define BIO_CLONED 2 /* doesn't own data */
|
#define BIO_CLONED 2 /* doesn't own data */
|
||||||
#define BIO_BOUNCED 3 /* bio is a bounce bio */
|
#define BIO_BOUNCED 3 /* bio is a bounce bio */
|
||||||
|
|
|
@ -23,14 +23,23 @@ struct kvec {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum iter_type {
|
enum iter_type {
|
||||||
ITER_IOVEC = 0,
|
/* set if ITER_BVEC doesn't hold a bv_page ref */
|
||||||
ITER_KVEC = 2,
|
ITER_BVEC_FLAG_NO_REF = 2,
|
||||||
ITER_BVEC = 4,
|
|
||||||
ITER_PIPE = 8,
|
/* iter types */
|
||||||
ITER_DISCARD = 16,
|
ITER_IOVEC = 4,
|
||||||
|
ITER_KVEC = 8,
|
||||||
|
ITER_BVEC = 16,
|
||||||
|
ITER_PIPE = 32,
|
||||||
|
ITER_DISCARD = 64,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct iov_iter {
|
struct iov_iter {
|
||||||
|
/*
|
||||||
|
* Bit 0 is the read/write bit, set if we're writing.
|
||||||
|
* Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
|
||||||
|
* the caller isn't expecting to drop a page reference when done.
|
||||||
|
*/
|
||||||
unsigned int type;
|
unsigned int type;
|
||||||
size_t iov_offset;
|
size_t iov_offset;
|
||||||
size_t count;
|
size_t count;
|
||||||
|
@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
|
||||||
return i->type & (READ | WRITE);
|
return i->type & (READ | WRITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
|
||||||
|
{
|
||||||
|
return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Total number of bytes covered by an iovec.
|
* Total number of bytes covered by an iovec.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue