for-4.21/aio-20181221
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAlwb7aEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgptKREACow57782zzoaqert39bczau9nDfY83vd0r u5ToCJstrN7esO0idJx5XLJHJjvbKlXa1I06shG3beX/kSnI+RYtMnKgBh1tzWmj ywxJLBB3CkCP/0Kt8oAzgjiiGUXeZYWmEkytk8JLXyQQxIBHrxBkNu6+HwBmxkOp kV06GGVR9l1cMUl6RF9pLdkRwMyZ7PPuNvPGjhbVDvHCIbRNeruxFU/a3TCqGRn/ oFRCBCEnaUZHIH0M9XeQFiCOXo4A1wE+CKM7ymMpAfLF0DGUo+EsAKFC6380eXLv Haiv0rmzGAJJE3BLvkVOLz5UlatPzIBvhp8l//Jxv9wi6x9JDbv31K+FaE8DiTuj dubhnhFdEo9HfcRPxBEysuMCho+56ZF2mw/kb0V0aRR9m3tRstrcyYXuVBM/PMpI HQrklAS25J70WtdlnSGayvasNC2H/HURFkGG9+QooW7r4trs56fjeef3O/wZVDIh oFnJVCOkcSo8O0bbh24LOeE6o1uGqlfUGCPM/Fv1qPJVwfGS41CfgilljsN0skCo Q3Cv8DPDeGZf8i3Bi9nvddWP9x7p9qaRg8sSKxlOtCjMdr4OIwe7vINQpUDyygQF ARqcRkfvHEdeN0KO38+6x6Q5YOchFMBwU311SkhVO4EQzto8WjDTJr2CR0Z7mKyC mDLWR6vM/g== =d3W9 -----END PGP SIGNATURE----- Merge tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block Pull aio updates from Jens Axboe: "Flushing out pre-patches for the buffered/polled aio series. Some fixes in here, but also optimizations" * tag 'for-4.21/aio-20181221' of git://git.kernel.dk/linux-block: aio: abstract out io_event filler helper aio: split out iocb copy from io_submit_one() aio: use iocb_put() instead of open coding it aio: only use blk plugs for > 2 depth submissions aio: don't zero entire aio_kiocb aio_get_req() aio: separate out ring reservation from req allocation aio: use assigned completion handler
This commit is contained in:
commit
956eb6cb36
138
fs/aio.c
138
fs/aio.c
|
@ -70,6 +70,12 @@ struct aio_ring {
|
|||
struct io_event io_events[0];
|
||||
}; /* 128 bytes + ring size */
|
||||
|
||||
/*
|
||||
* Plugging is meant to work with larger batches of IOs. If we don't
|
||||
* have more than the below, then don't bother setting up a plug.
|
||||
*/
|
||||
#define AIO_PLUG_THRESHOLD 2
|
||||
|
||||
#define AIO_RING_PAGES 8
|
||||
|
||||
struct kioctx_table {
|
||||
|
@ -902,7 +908,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
|
|||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static bool get_reqs_available(struct kioctx *ctx)
|
||||
static bool __get_reqs_available(struct kioctx *ctx)
|
||||
{
|
||||
struct kioctx_cpu *kcpu;
|
||||
bool ret = false;
|
||||
|
@ -994,6 +1000,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
|
|||
spin_unlock_irq(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static bool get_reqs_available(struct kioctx *ctx)
|
||||
{
|
||||
if (__get_reqs_available(ctx))
|
||||
return true;
|
||||
user_refill_reqs_available(ctx);
|
||||
return __get_reqs_available(ctx);
|
||||
}
|
||||
|
||||
/* aio_get_req
|
||||
* Allocate a slot for an aio request.
|
||||
* Returns NULL if no requests are free.
|
||||
|
@ -1002,24 +1016,16 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
|
|||
{
|
||||
struct aio_kiocb *req;
|
||||
|
||||
if (!get_reqs_available(ctx)) {
|
||||
user_refill_reqs_available(ctx);
|
||||
if (!get_reqs_available(ctx))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
|
||||
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
|
||||
if (unlikely(!req))
|
||||
goto out_put;
|
||||
return NULL;
|
||||
|
||||
percpu_ref_get(&ctx->reqs);
|
||||
req->ki_ctx = ctx;
|
||||
INIT_LIST_HEAD(&req->ki_list);
|
||||
refcount_set(&req->ki_refcnt, 0);
|
||||
req->ki_ctx = ctx;
|
||||
req->ki_eventfd = NULL;
|
||||
return req;
|
||||
out_put:
|
||||
put_reqs_available(ctx, 1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
|
||||
|
@ -1059,6 +1065,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
|
|||
}
|
||||
}
|
||||
|
||||
static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
|
||||
long res, long res2)
|
||||
{
|
||||
ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
|
||||
ev->data = iocb->ki_user_data;
|
||||
ev->res = res;
|
||||
ev->res2 = res2;
|
||||
}
|
||||
|
||||
/* aio_complete
|
||||
* Called when the io request on the given iocb is complete.
|
||||
*/
|
||||
|
@ -1086,10 +1101,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
|
|||
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
||||
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
|
||||
|
||||
event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
|
||||
event->data = iocb->ki_user_data;
|
||||
event->res = res;
|
||||
event->res2 = res2;
|
||||
aio_fill_event(event, iocb, res, res2);
|
||||
|
||||
kunmap_atomic(ev_page);
|
||||
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
|
||||
|
@ -1416,7 +1428,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
|
|||
aio_complete(iocb, res, res2);
|
||||
}
|
||||
|
||||
static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
|
||||
static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -1457,7 +1469,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
|
||||
static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
|
||||
bool vectored, bool compat, struct iov_iter *iter)
|
||||
{
|
||||
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
|
||||
|
@ -1492,12 +1504,12 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
|
|||
ret = -EINTR;
|
||||
/*FALLTHRU*/
|
||||
default:
|
||||
aio_complete_rw(req, ret, 0);
|
||||
req->ki_complete(req, ret, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
|
||||
bool compat)
|
||||
static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
|
||||
bool vectored, bool compat)
|
||||
{
|
||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||
struct iov_iter iter;
|
||||
|
@ -1529,8 +1541,8 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
|
||||
bool compat)
|
||||
static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
|
||||
bool vectored, bool compat)
|
||||
{
|
||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||
struct iov_iter iter;
|
||||
|
@ -1585,7 +1597,8 @@ static void aio_fsync_work(struct work_struct *work)
|
|||
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
|
||||
}
|
||||
|
||||
static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
|
||||
static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
|
||||
bool datasync)
|
||||
{
|
||||
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
|
||||
iocb->aio_rw_flags))
|
||||
|
@ -1713,7 +1726,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
|
|||
add_wait_queue(head, &pt->iocb->poll.wait);
|
||||
}
|
||||
|
||||
static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
|
||||
static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
|
||||
{
|
||||
struct kioctx *ctx = aiocb->ki_ctx;
|
||||
struct poll_iocb *req = &aiocb->poll;
|
||||
|
@ -1733,6 +1746,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
|
|||
if (unlikely(!req->file))
|
||||
return -EBADF;
|
||||
|
||||
req->head = NULL;
|
||||
req->woken = false;
|
||||
req->cancelled = false;
|
||||
|
||||
apt.pt._qproc = aio_poll_queue_proc;
|
||||
apt.pt._key = req->events;
|
||||
apt.iocb = aiocb;
|
||||
|
@ -1781,44 +1798,44 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
bool compat)
|
||||
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
|
||||
struct iocb __user *user_iocb, bool compat)
|
||||
{
|
||||
struct aio_kiocb *req;
|
||||
struct iocb iocb;
|
||||
ssize_t ret;
|
||||
|
||||
if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
|
||||
return -EFAULT;
|
||||
|
||||
/* enforce forwards compatibility on users */
|
||||
if (unlikely(iocb.aio_reserved2)) {
|
||||
if (unlikely(iocb->aio_reserved2)) {
|
||||
pr_debug("EINVAL: reserve field set\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* prevent overflows */
|
||||
if (unlikely(
|
||||
(iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
|
||||
(iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
|
||||
((ssize_t)iocb.aio_nbytes < 0)
|
||||
(iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
|
||||
(iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
|
||||
((ssize_t)iocb->aio_nbytes < 0)
|
||||
)) {
|
||||
pr_debug("EINVAL: overflow check\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
req = aio_get_req(ctx);
|
||||
if (unlikely(!req))
|
||||
if (!get_reqs_available(ctx))
|
||||
return -EAGAIN;
|
||||
|
||||
if (iocb.aio_flags & IOCB_FLAG_RESFD) {
|
||||
ret = -EAGAIN;
|
||||
req = aio_get_req(ctx);
|
||||
if (unlikely(!req))
|
||||
goto out_put_reqs_available;
|
||||
|
||||
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
|
||||
/*
|
||||
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
|
||||
* instance of the file* now. The file descriptor must be
|
||||
* an eventfd() fd, and will be signaled for each completed
|
||||
* event using the eventfd_signal() function.
|
||||
*/
|
||||
req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
|
||||
req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
|
||||
if (IS_ERR(req->ki_eventfd)) {
|
||||
ret = PTR_ERR(req->ki_eventfd);
|
||||
req->ki_eventfd = NULL;
|
||||
|
@ -1833,32 +1850,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|||
}
|
||||
|
||||
req->ki_user_iocb = user_iocb;
|
||||
req->ki_user_data = iocb.aio_data;
|
||||
req->ki_user_data = iocb->aio_data;
|
||||
|
||||
switch (iocb.aio_lio_opcode) {
|
||||
switch (iocb->aio_lio_opcode) {
|
||||
case IOCB_CMD_PREAD:
|
||||
ret = aio_read(&req->rw, &iocb, false, compat);
|
||||
ret = aio_read(&req->rw, iocb, false, compat);
|
||||
break;
|
||||
case IOCB_CMD_PWRITE:
|
||||
ret = aio_write(&req->rw, &iocb, false, compat);
|
||||
ret = aio_write(&req->rw, iocb, false, compat);
|
||||
break;
|
||||
case IOCB_CMD_PREADV:
|
||||
ret = aio_read(&req->rw, &iocb, true, compat);
|
||||
ret = aio_read(&req->rw, iocb, true, compat);
|
||||
break;
|
||||
case IOCB_CMD_PWRITEV:
|
||||
ret = aio_write(&req->rw, &iocb, true, compat);
|
||||
ret = aio_write(&req->rw, iocb, true, compat);
|
||||
break;
|
||||
case IOCB_CMD_FSYNC:
|
||||
ret = aio_fsync(&req->fsync, &iocb, false);
|
||||
ret = aio_fsync(&req->fsync, iocb, false);
|
||||
break;
|
||||
case IOCB_CMD_FDSYNC:
|
||||
ret = aio_fsync(&req->fsync, &iocb, true);
|
||||
ret = aio_fsync(&req->fsync, iocb, true);
|
||||
break;
|
||||
case IOCB_CMD_POLL:
|
||||
ret = aio_poll(req, &iocb);
|
||||
ret = aio_poll(req, iocb);
|
||||
break;
|
||||
default:
|
||||
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
|
||||
pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
@ -1872,14 +1889,25 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
|||
goto out_put_req;
|
||||
return 0;
|
||||
out_put_req:
|
||||
put_reqs_available(ctx, 1);
|
||||
percpu_ref_put(&ctx->reqs);
|
||||
if (req->ki_eventfd)
|
||||
eventfd_ctx_put(req->ki_eventfd);
|
||||
kmem_cache_free(kiocb_cachep, req);
|
||||
iocb_put(req);
|
||||
out_put_reqs_available:
|
||||
put_reqs_available(ctx, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
|
||||
bool compat)
|
||||
{
|
||||
struct iocb iocb;
|
||||
|
||||
if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
|
||||
return -EFAULT;
|
||||
|
||||
return __io_submit_one(ctx, &iocb, user_iocb, compat);
|
||||
}
|
||||
|
||||
/* sys_io_submit:
|
||||
* Queue the nr iocbs pointed to by iocbpp for processing. Returns
|
||||
* the number of iocbs queued. May return -EINVAL if the aio_context
|
||||
|
@ -1912,6 +1940,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
|
|||
if (nr > ctx->nr_events)
|
||||
nr = ctx->nr_events;
|
||||
|
||||
if (nr > AIO_PLUG_THRESHOLD)
|
||||
blk_start_plug(&plug);
|
||||
for (i = 0; i < nr; i++) {
|
||||
struct iocb __user *user_iocb;
|
||||
|
@ -1925,6 +1954,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
|
|||
if (ret)
|
||||
break;
|
||||
}
|
||||
if (nr > AIO_PLUG_THRESHOLD)
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
percpu_ref_put(&ctx->users);
|
||||
|
@ -1952,6 +1982,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
|
|||
if (nr > ctx->nr_events)
|
||||
nr = ctx->nr_events;
|
||||
|
||||
if (nr > AIO_PLUG_THRESHOLD)
|
||||
blk_start_plug(&plug);
|
||||
for (i = 0; i < nr; i++) {
|
||||
compat_uptr_t user_iocb;
|
||||
|
@ -1965,6 +1996,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
|
|||
if (ret)
|
||||
break;
|
||||
}
|
||||
if (nr > AIO_PLUG_THRESHOLD)
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
percpu_ref_put(&ctx->users);
|
||||
|
|
Loading…
Reference in New Issue