for-5.11/io_uring-2020-12-14

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl/XeDUQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpnF9D/4+l1r1G5AcsSsgEvu1aCjP83LLWrHIAA5+
 ca3OY6vwOjBvqI7oOoPcYJeYJ9uuGGQc31tDFJtP6Sl6Gk31AB4iSddyrowaX+t+
 UJyJNfsgWKiLjY48EyQJ0gIqjuvPq8hPGMGClJb1A7+w87fqBC5UwCWEnJmE7MaX
 401kIw0CRVWYTnDEOYxToss6D6gQ30E8UZjdJ0cG4g8xVQBY2kKwYR3F9tDlAwsY
 CF+RCKpibcKwnaNZJBL67ClWjj1hC0ivg0O0G+W1UYysesKKdWFRI2rmxvH55K5T
 7tHlfVuVPladNmlLVNZnCvyqBrFHyAZPmOsdv3xQOvJ7pZPaxKV9xIYryQKZW4H4
 9tKkj3T1aop/fDGqIMxgymZsWW+1vvxAmM+7WkdOPHwHRSakJ5wGIj6Ekpton+5y
 aixJUFq390o/o+S8PDO7mgzdvYrasv3iLl5UxnIcU3rq30wxnRKit4vUZny8DlzF
 gOTw7QSocximhGYci+Uz4d4/XdK2CHc6eZDkQDltgJXxIrdsrN0qKxMCEsMKgCR1
 RMiDv+52MP6kp/wpXiOHQF25YRnUOW0qfEjWKK6Ye28DGuKPPuIXtN/BUD3rjdIc
 IJX3lDfOI3PgXNX24nOarucrF+ootyRmE6tGTVZhCVBhUXGR+MGatGfkeCqnmNzZ
 gny2+UrGIQ==
 =ly9V
 -----END PGP SIGNATURE-----

Merge tag 'for-5.11/io_uring-2020-12-14' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Fairly light set of changes this time around, and mostly some bits
  that were pushed out to 5.11 instead of 5.10, fixes/cleanups, and a
  few features. In particular:

   - Cleanups around iovec import (David Laight, Pavel)

   - Add timeout support for io_uring_enter(2), which enables us to
     clean up liburing and avoid a timeout sqe submission in the
     completion path.

     The big win here is that it allows setups that split SQ and CQ
     handling into separate threads to avoid locking, as the CQ side
     will no longer submit when timeouts are needed when waiting for
     events (Hao Xu)

   - Add support for socket shutdown, and renameat/unlinkat.

   - SQPOLL cleanups and improvements (Xiaoguang Wang)

   - Allow SQPOLL setups for CAP_SYS_NICE, and enable regular
     (non-fixed) files to be used.

   - Cancelation improvements (Pavel)

   - Fixed file reference improvements (Pavel)

   - IOPOLL related race fixes (Pavel)

   - Lots of other little fixes and cleanups (mostly Pavel)"

* tag 'for-5.11/io_uring-2020-12-14' of git://git.kernel.dk/linux-block: (43 commits)
  io_uring: fix io_cqring_events()'s noflush
  io_uring: fix racy IOPOLL flush overflow
  io_uring: fix racy IOPOLL completions
  io_uring: always let io_iopoll_complete() complete polled io
  io_uring: add timeout update
  io_uring: restructure io_timeout_cancel()
  io_uring: fix files cancellation
  io_uring: use bottom half safe lock for fixed file data
  io_uring: fix miscounting ios_left
  io_uring: change submit file state invariant
  io_uring: check kthread stopped flag when sq thread is unparked
  io_uring: share fixed_file_refs b/w multiple rsrcs
  io_uring: replace inflight_wait with tctx->wait
  io_uring: don't take fs for recvmsg/sendmsg
  io_uring: only wake up sq thread while current task is in io worker context
  io_uring: don't acquire uring_lock twice
  io_uring: initialize 'timeout' properly in io_sq_thread()
  io_uring: refactor io_sq_thread() handling
  io_uring: always batch cancel in *cancel_files()
  io_uring: pass files into kill timeouts/poll
  ...
This commit is contained in:
Linus Torvalds 2020-12-16 12:44:05 -08:00
commit 48aba79bcf
9 changed files with 829 additions and 595 deletions

View File

@ -78,6 +78,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
long do_rmdir(int dfd, struct filename *name);
long do_unlinkat(int dfd, struct filename *name);
int may_linkat(struct path *link);
int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
struct filename *newname, unsigned int flags);
/*
* namespace.c

View File

@ -1078,16 +1078,6 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
return IO_WQ_CANCEL_NOTFOUND;
}
static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data)
{
return work == data;
}
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{
return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false);
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
{
int ret = -ENOMEM, node;

View File

@ -129,7 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work)
}
void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);

File diff suppressed because it is too large Load Diff

View File

@ -4346,8 +4346,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
EXPORT_SYMBOL(vfs_rename);
static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
const char __user *newname, unsigned int flags)
int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct filename *to, unsigned int flags)
{
struct dentry *old_dentry, *new_dentry;
struct dentry *trap;
@ -4355,32 +4355,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
struct qstr old_last, new_last;
int old_type, new_type;
struct inode *delegated_inode = NULL;
struct filename *from;
struct filename *to;
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false;
int error;
int error = -EINVAL;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
goto put_both;
if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
(flags & RENAME_EXCHANGE))
return -EINVAL;
goto put_both;
if (flags & RENAME_EXCHANGE)
target_flags = 0;
retry:
from = filename_parentat(olddfd, getname(oldname), lookup_flags,
&old_path, &old_last, &old_type);
from = filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
if (IS_ERR(from)) {
error = PTR_ERR(from);
goto exit;
goto put_new;
}
to = filename_parentat(newdfd, getname(newname), lookup_flags,
&new_path, &new_last, &new_type);
to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
&new_type);
if (IS_ERR(to)) {
error = PTR_ERR(to);
goto exit1;
@ -4473,34 +4471,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
if (retry_estale(error, lookup_flags))
should_retry = true;
path_put(&new_path);
putname(to);
exit1:
path_put(&old_path);
putname(from);
if (should_retry) {
should_retry = false;
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
exit:
put_both:
if (!IS_ERR(from))
putname(from);
put_new:
if (!IS_ERR(to))
putname(to);
return error;
}
SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname, unsigned int, flags)
{
return do_renameat2(olddfd, oldname, newdfd, newname, flags);
return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
flags);
}
SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
int, newdfd, const char __user *, newname)
{
return do_renameat2(olddfd, oldname, newdfd, newname, 0);
return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
0);
}
SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
{
return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
getname(newname), 0);
}
int readlink_copy(char __user *buffer, int buflen, const char *link)

View File

@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec);
extern int __sys_shutdown_sock(struct socket *sock, int how);
extern int __sys_shutdown(int fd, int how);
extern struct ns_common *get_net_ns(struct ns_common *ns);

View File

@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
struct io_uring_params __user *p);
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
u32 min_complete, u32 flags,
const sigset_t __user *sig, size_t sigsz);
const void __user *argp, size_t argsz);
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args);

View File

@ -42,6 +42,8 @@ struct io_uring_sqe {
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
__u32 rename_flags;
__u32 unlink_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@ -132,6 +134,9 @@ enum {
IORING_OP_PROVIDE_BUFFERS,
IORING_OP_REMOVE_BUFFERS,
IORING_OP_TEE,
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
/* this goes last, obviously */
IORING_OP_LAST,
@ -146,6 +151,7 @@ enum {
* sqe->timeout_flags
*/
#define IORING_TIMEOUT_ABS (1U << 0)
#define IORING_TIMEOUT_UPDATE (1U << 1)
/*
* sqe->splice_flags
@ -226,6 +232,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_GETEVENTS (1U << 0)
#define IORING_ENTER_SQ_WAKEUP (1U << 1)
#define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3)
/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
@ -253,6 +260,8 @@ struct io_uring_params {
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5)
#define IORING_FEAT_POLL_32BITS (1U << 6)
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
/*
* io_uring_register(2) opcodes and arguments
@ -329,4 +338,11 @@ enum {
IORING_RESTRICTION_LAST
};
struct io_uring_getevents_arg {
__u64 sigmask;
__u32 sigmask_sz;
__u32 pad;
__u64 ts;
};
#endif

View File

@ -2175,6 +2175,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
* Shutdown a socket.
*/
int __sys_shutdown_sock(struct socket *sock, int how)
{
int err;
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
return err;
}
int __sys_shutdown(int fd, int how)
{
int err, fput_needed;
@ -2182,9 +2193,7 @@ int __sys_shutdown(int fd, int how)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
err = security_socket_shutdown(sock, how);
if (!err)
err = sock->ops->shutdown(sock, how);
err = __sys_shutdown_sock(sock, how);
fput_light(sock->file, fput_needed);
}
return err;