for-5.12/io_uring-2021-02-17
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmAtYbYQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgppeWD/4xKhzBCGZWOkdycaaPhsUTOjNNIPmCBhlz QQj4KFSEuJNKACUg53Ak0oECJTaH5976kjKkKs7Z+hzmkEwboLBI4erkcT9MGC3M mPx349qBq9X3sYaFrUJF3h0sjRr+wa60nWQ01oVH8HkfI4bCNCHoqo5jDvMPWsYT ksFbUm8YWEZmi0K2yXFWXuJIN2bVBd72a8CrvtF3ksdEMYxbWWTOAcrhYJ4H5/U7 BQjWIxiIVsAoJohcXWq/Swh8cgvgb5uJVpNUU8VEFob/jI3Gc3YojIToISB6soUL DNhDJLeyZjuXfE1Ej+ySas9bpdG4LgxzsDBl9lFl9EQkSo1c3h/lEx85aeixAZla QfjTOVUabzdPzvZ9H1yDQISxjVLy2PotnhVMy/rSSrnDKlowtNB9iEzd6cpzFzxU fxomz1d6+w8rZY9jaRIAcMNa6bEOuYmcP9V8rIzGeg3Mm3jqL7H/JgJu5s2YbjpN InmTNu4cwLeTO65DzqVxF8UGbZ2tHbMm5pNeVBYxuY1adRgJFlIOP5kYlNlyiY+D Bt41CRuK3hqpYfXh7nSK8U4BKEhMikTCS0W4aKL5EzLZ20rxjgTlaHZiOBqd9vep 1tqNjPIvL2jWfF+5shwAZbupj3WKbuVqi4S2jXljv+Wkmk4ZVLSX3fQZv2I7JTHM I2qa59PB4A== =8MX/ -----END PGP SIGNATURE----- Merge tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux-block Pull io_uring updates from Jens Axboe: "Highlights from this cycles are things like request recycling and task_work optimizations, which net us anywhere from 10-20% of speedups on workloads that mostly are inline. This work was originally done to put io_uring under memcg, which adds considerable overhead. But it's a really nice win as well. Also worth highlighting is the LOOKUP_CACHED work in the VFS, and using it in io_uring. Greatly speeds up the fast path for file opens. Summary: - Put io_uring under memcg protection. We accounted just the rings themselves under rlimit memlock before, now we account everything. - Request cache recycling, persistent across invocations (Pavel, me) - First part of a cleanup/improvement to buffer registration (Bijan) - SQPOLL fixes (Hao) - File registration NULL pointer fixup (Dan) - LOOKUP_CACHED support for io_uring - Disable /proc/thread-self/ for io_uring, like we do for /proc/self - Add Pavel to the io_uring MAINTAINERS entry - Tons of code cleanups and optimizations (Pavel) - Support for skip entries in file registration (Noah)" * tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux-block: (103 commits) io_uring: tctx->task_lock should be IRQ safe proc: don't allow async path resolution of /proc/thread-self components io_uring: kill cached requests from exiting task closing the ring io_uring: add helper to free all request caches io_uring: allow task match to be passed to io_req_cache_free() io-wq: clear out worker ->fs and ->files io_uring: optimise io_init_req() flags setting io_uring: clean io_req_find_next() fast check io_uring: don't check PF_EXITING from syscall io_uring: don't split out consume out of SQE get io_uring: save ctx put/get for task_work submit io_uring: don't duplicate io_req_task_queue() io_uring: optimise SQPOLL mm/files grabbing io_uring: optimise out unlikely link queue io_uring: take compl state from submit state io_uring: inline io_complete_rw_common() io_uring: move res check out of io_rw_reissue() io_uring: simplify iopoll reissuing io_uring: clean up io_req_free_batch_finish() io_uring: move submit side state closer in the ring ...
This commit is contained in:
commit
5bbb336ba7
|
@ -6830,6 +6830,9 @@ F: include/linux/fs.h
|
|||
F: include/linux/fs_types.h
|
||||
F: include/uapi/linux/fs.h
|
||||
F: include/uapi/linux/openat2.h
|
||||
X: fs/io-wq.c
|
||||
X: fs/io-wq.h
|
||||
X: fs/io_uring.c
|
||||
|
||||
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
|
||||
M: Riku Voipio <riku.voipio@iki.fi>
|
||||
|
@ -9263,6 +9266,7 @@ F: include/uapi/linux/iommu.h
|
|||
|
||||
IO_URING
|
||||
M: Jens Axboe <axboe@kernel.dk>
|
||||
R: Pavel Begunkov <asml.silence@gmail.com>
|
||||
L: io-uring@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.dk/linux-block
|
||||
|
@ -9270,6 +9274,7 @@ T: git git://git.kernel.dk/liburing
|
|||
F: fs/io-wq.c
|
||||
F: fs/io-wq.h
|
||||
F: fs/io_uring.c
|
||||
F: include/linux/io_uring.h
|
||||
F: include/uapi/linux/io_uring.h
|
||||
|
||||
IPMI SUBSYSTEM
|
||||
|
|
48
fs/file.c
48
fs/file.c
|
@ -22,6 +22,8 @@
|
|||
#include <linux/close_range.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
|
||||
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
|
||||
/* our min() is unusable in constant expressions ;-/ */
|
||||
|
@ -731,6 +733,32 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* See close_fd_get_file() below, this variant assumes current->files->file_lock
|
||||
* is held.
|
||||
*/
|
||||
int __close_fd_get_file(unsigned int fd, struct file **res)
|
||||
{
|
||||
struct files_struct *files = current->files;
|
||||
struct file *file;
|
||||
struct fdtable *fdt;
|
||||
|
||||
fdt = files_fdtable(files);
|
||||
if (fd >= fdt->max_fds)
|
||||
goto out_err;
|
||||
file = fdt->fd[fd];
|
||||
if (!file)
|
||||
goto out_err;
|
||||
rcu_assign_pointer(fdt->fd[fd], NULL);
|
||||
__put_unused_fd(files, fd);
|
||||
get_file(file);
|
||||
*res = file;
|
||||
return 0;
|
||||
out_err:
|
||||
*res = NULL;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* variant of close_fd that gets a ref on the file for later fput.
|
||||
* The caller must ensure that filp_close() called on the file, and then
|
||||
|
@ -739,27 +767,13 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
|
|||
int close_fd_get_file(unsigned int fd, struct file **res)
|
||||
{
|
||||
struct files_struct *files = current->files;
|
||||
struct file *file;
|
||||
struct fdtable *fdt;
|
||||
int ret;
|
||||
|
||||
spin_lock(&files->file_lock);
|
||||
fdt = files_fdtable(files);
|
||||
if (fd >= fdt->max_fds)
|
||||
goto out_unlock;
|
||||
file = fdt->fd[fd];
|
||||
if (!file)
|
||||
goto out_unlock;
|
||||
rcu_assign_pointer(fdt->fd[fd], NULL);
|
||||
__put_unused_fd(files, fd);
|
||||
ret = __close_fd_get_file(fd, res);
|
||||
spin_unlock(&files->file_lock);
|
||||
get_file(file);
|
||||
*res = file;
|
||||
return 0;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&files->file_lock);
|
||||
*res = NULL;
|
||||
return -ENOENT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void do_close_on_exec(struct files_struct *files)
|
||||
|
|
|
@ -133,6 +133,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
|
|||
const char *, const struct open_flags *);
|
||||
extern struct open_how build_open_how(int flags, umode_t mode);
|
||||
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
|
||||
extern int __close_fd_get_file(unsigned int fd, struct file **res);
|
||||
|
||||
long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
|
||||
int chmod_common(const struct path *path, umode_t mode);
|
||||
|
|
31
fs/io-wq.c
31
fs/io-wq.c
|
@ -64,9 +64,7 @@ struct io_worker {
|
|||
#endif
|
||||
const struct cred *cur_creds;
|
||||
const struct cred *saved_creds;
|
||||
struct files_struct *restore_files;
|
||||
struct nsproxy *restore_nsproxy;
|
||||
struct fs_struct *restore_fs;
|
||||
};
|
||||
|
||||
#if BITS_PER_LONG == 64
|
||||
|
@ -156,19 +154,19 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
|
|||
worker->cur_creds = worker->saved_creds = NULL;
|
||||
}
|
||||
|
||||
if (current->files != worker->restore_files) {
|
||||
if (current->files) {
|
||||
__acquire(&wqe->lock);
|
||||
raw_spin_unlock_irq(&wqe->lock);
|
||||
dropped_lock = true;
|
||||
|
||||
task_lock(current);
|
||||
current->files = worker->restore_files;
|
||||
current->files = NULL;
|
||||
current->nsproxy = worker->restore_nsproxy;
|
||||
task_unlock(current);
|
||||
}
|
||||
|
||||
if (current->fs != worker->restore_fs)
|
||||
current->fs = worker->restore_fs;
|
||||
if (current->fs)
|
||||
current->fs = NULL;
|
||||
|
||||
/*
|
||||
* If we have an active mm, we need to drop the wq lock before unusing
|
||||
|
@ -329,11 +327,11 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
|
|||
allow_kernel_signal(SIGINT);
|
||||
|
||||
current->flags |= PF_IO_WORKER;
|
||||
current->fs = NULL;
|
||||
current->files = NULL;
|
||||
|
||||
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
||||
worker->restore_files = current->files;
|
||||
worker->restore_nsproxy = current->nsproxy;
|
||||
worker->restore_fs = current->fs;
|
||||
io_wqe_inc_running(wqe, worker);
|
||||
}
|
||||
|
||||
|
@ -555,23 +553,21 @@ static void io_worker_handle_work(struct io_worker *worker)
|
|||
|
||||
/* handle a whole dependent link */
|
||||
do {
|
||||
struct io_wq_work *old_work, *next_hashed, *linked;
|
||||
struct io_wq_work *next_hashed, *linked;
|
||||
unsigned int hash = io_get_work_hash(work);
|
||||
|
||||
next_hashed = wq_next_work(work);
|
||||
io_impersonate_work(worker, work);
|
||||
wq->do_work(work);
|
||||
io_assign_current_work(worker, NULL);
|
||||
|
||||
old_work = work;
|
||||
linked = wq->do_work(work);
|
||||
|
||||
linked = wq->free_work(work);
|
||||
work = next_hashed;
|
||||
if (!work && linked && !io_wq_is_hashed(linked)) {
|
||||
work = linked;
|
||||
linked = NULL;
|
||||
}
|
||||
io_assign_current_work(worker, work);
|
||||
wq->free_work(old_work);
|
||||
|
||||
if (linked)
|
||||
io_wqe_enqueue(wqe, linked);
|
||||
|
||||
|
@ -850,11 +846,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
|||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
do {
|
||||
struct io_wq_work *old_work = work;
|
||||
|
||||
work->flags |= IO_WQ_WORK_CANCEL;
|
||||
work = wq->do_work(work);
|
||||
wq->free_work(old_work);
|
||||
wq->do_work(work);
|
||||
work = wq->free_work(work);
|
||||
} while (work);
|
||||
}
|
||||
|
||||
|
@ -944,7 +938,6 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
|||
*/
|
||||
spin_lock_irqsave(&worker->lock, flags);
|
||||
if (worker->cur_work &&
|
||||
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
|
||||
match->fn(worker->cur_work, match->data)) {
|
||||
send_sig(SIGINT, worker->task, 1);
|
||||
match->nr_running++;
|
||||
|
|
14
fs/io-wq.h
14
fs/io-wq.h
|
@ -9,7 +9,6 @@ enum {
|
|||
IO_WQ_WORK_CANCEL = 1,
|
||||
IO_WQ_WORK_HASHED = 2,
|
||||
IO_WQ_WORK_UNBOUND = 4,
|
||||
IO_WQ_WORK_NO_CANCEL = 8,
|
||||
IO_WQ_WORK_CONCURRENT = 16,
|
||||
|
||||
IO_WQ_WORK_FILES = 32,
|
||||
|
@ -28,15 +27,6 @@ enum io_wq_cancel {
|
|||
IO_WQ_CANCEL_NOTFOUND, /* work not found */
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
|
||||
struct io_wq_work_list {
|
||||
struct io_wq_work_node *first;
|
||||
struct io_wq_work_node *last;
|
||||
};
|
||||
|
||||
static inline void wq_list_add_after(struct io_wq_work_node *node,
|
||||
struct io_wq_work_node *pos,
|
||||
struct io_wq_work_list *list)
|
||||
|
@ -107,8 +97,8 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
|
|||
return container_of(work->list.next, struct io_wq_work, list);
|
||||
}
|
||||
|
||||
typedef void (free_work_fn)(struct io_wq_work *);
|
||||
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
|
||||
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
|
||||
typedef void (io_wq_work_fn)(struct io_wq_work *);
|
||||
|
||||
struct io_wq_data {
|
||||
struct user_struct *user;
|
||||
|
|
2624
fs/io_uring.c
2624
fs/io_uring.c
File diff suppressed because it is too large
Load Diff
|
@ -20,7 +20,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
|
|||
* Not currently supported. Once we can inherit all of struct pid,
|
||||
* we can allow this.
|
||||
*/
|
||||
if (current->flags & PF_KTHREAD)
|
||||
if (current->flags & PF_IO_WORKER)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
if (!tgid)
|
||||
|
|
|
@ -17,6 +17,13 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
|
|||
pid_t pid = task_pid_nr_ns(current, ns);
|
||||
char *name;
|
||||
|
||||
/*
|
||||
* Not currently supported. Once we can inherit all of struct pid,
|
||||
* we can allow this.
|
||||
*/
|
||||
if (current->flags & PF_IO_WORKER)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
if (!pid)
|
||||
return ERR_PTR(-ENOENT);
|
||||
name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
|
||||
|
|
|
@ -22,6 +22,15 @@ struct io_identity {
|
|||
refcount_t count;
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
|
||||
struct io_wq_work_list {
|
||||
struct io_wq_work_node *first;
|
||||
struct io_wq_work_node *last;
|
||||
};
|
||||
|
||||
struct io_uring_task {
|
||||
/* submission side */
|
||||
struct xarray xa;
|
||||
|
@ -32,6 +41,11 @@ struct io_uring_task {
|
|||
struct io_identity *identity;
|
||||
atomic_t in_idle;
|
||||
bool sqpoll;
|
||||
|
||||
spinlock_t task_lock;
|
||||
struct io_wq_work_list task_list;
|
||||
unsigned long task_state;
|
||||
struct callback_head task_work;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
|
|
|
@ -285,12 +285,22 @@ enum {
|
|||
IORING_REGISTER_LAST
|
||||
};
|
||||
|
||||
/* deprecated, see struct io_uring_rsrc_update */
|
||||
struct io_uring_files_update {
|
||||
__u32 offset;
|
||||
__u32 resv;
|
||||
__aligned_u64 /* __s32 * */ fds;
|
||||
};
|
||||
|
||||
struct io_uring_rsrc_update {
|
||||
__u32 offset;
|
||||
__u32 resv;
|
||||
__aligned_u64 data;
|
||||
};
|
||||
|
||||
/* Skip updating fd indexes set to this value in the fd table */
|
||||
#define IORING_REGISTER_FILES_SKIP (-2)
|
||||
|
||||
#define IO_URING_OP_SUPPORTED (1U << 0)
|
||||
|
||||
struct io_uring_probe_op {
|
||||
|
|
Loading…
Reference in New Issue