aio: implement io_pgetevents

This is the io_getevents equivalent of ppoll/pselect and allows to
properly mix signals and aio completions (especially with IOCB_CMD_POLL)
and atomically executes the following sequence:

	sigset_t origmask;

	pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
	ret = io_getevents(ctx, min_nr, nr, events, timeout);
	pthread_sigmask(SIG_SETMASK, &origmask, NULL);

Note that unlike many other signal related calls we do not pass a sigmask
size, as that would get us to 7 arguments, which aren't easily supported
by the syscall infrastructure.  It seems a lot less painful to just add a
new syscall variant in the unlikely case we're going to increase the
sigset size.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:
Christoph Hellwig 2018-05-02 19:51:00 +02:00
parent a3c0d439e4
commit 7a074e96de
8 changed files with 131 additions and 12 deletions
arch/x86/entry/syscalls
fs
include
linux
uapi
asm-generic
linux
kernel

View File

@ -396,3 +396,4 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents

View File

@ -341,6 +341,7 @@
330 common pkey_alloc __x64_sys_pkey_alloc
331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
#
# x32-specific system call numbers start at 512 to avoid cache impact

114
fs/aio.c
View File

@ -1303,10 +1303,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret),
until);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret;
}
@ -1921,13 +1917,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct timespec __user *, timeout)
{
struct timespec64 ts;
int ret;
if (timeout) {
if (unlikely(get_timespec64(&ts, timeout)))
if (timeout && unlikely(get_timespec64(&ts, timeout)))
return -EFAULT;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret;
}
SYSCALL_DEFINE6(io_pgetevents,
aio_context_t, ctx_id,
long, min_nr,
long, nr,
struct io_event __user *, events,
struct timespec __user *, timeout,
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
int ret;
if (timeout && unlikely(get_timespec64(&ts, timeout)))
return -EFAULT;
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
if (ksig.sigmask) {
if (ksig.sigsetsize != sizeof(sigset_t))
return -EINVAL;
if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
return -EFAULT;
sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
}
return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
if (signal_pending(current)) {
if (ksig.sigmask) {
current->saved_sigmask = sigsaved;
set_restore_sigmask();
}
if (!ret)
ret = -ERESTARTNOHAND;
} else {
if (ksig.sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
return ret;
}
#ifdef CONFIG_COMPAT
@ -1938,13 +1981,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct compat_timespec __user *, timeout)
{
struct timespec64 t;
int ret;
if (timeout) {
if (compat_get_timespec64(&t, timeout))
if (timeout && compat_get_timespec64(&t, timeout))
return -EFAULT;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret;
}
struct __compat_aio_sigset {
compat_sigset_t __user *sigmask;
compat_size_t sigsetsize;
};
COMPAT_SYSCALL_DEFINE6(io_pgetevents,
compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
struct io_event __user *, events,
struct compat_timespec __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
int ret;
if (timeout && compat_get_timespec64(&t, timeout))
return -EFAULT;
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
if (ksig.sigmask) {
if (ksig.sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
if (get_compat_sigset(&ksigmask, ksig.sigmask))
return -EFAULT;
sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
}
return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
if (signal_pending(current)) {
if (ksig.sigmask) {
current->saved_sigmask = sigsaved;
set_restore_sigmask();
}
if (!ret)
ret = -ERESTARTNOHAND;
} else {
if (ksig.sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
return ret;
}
#endif

View File

@ -330,6 +330,7 @@ extern int put_compat_rusage(const struct rusage *,
struct compat_rusage __user *);
struct compat_siginfo;
struct __compat_aio_sigset;
struct compat_dirent {
u32 d_ino;
@ -553,6 +554,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
compat_long_t nr,
struct io_event __user *events,
struct compat_timespec __user *timeout);
asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
compat_long_t min_nr,
compat_long_t nr,
struct io_event __user *events,
struct compat_timespec __user *timeout,
const struct __compat_aio_sigset __user *usig);
/* fs/cookies.c */
asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);

View File

@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
long nr,
struct io_event __user *events,
struct timespec __user *timeout);
asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
struct timespec __user *timeout,
const struct __aio_sigset *sig);
/* fs/xattr.c */
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,

View File

@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
#define __NR_io_pgetevents 292
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#undef __NR_syscalls
#define __NR_syscalls 292
#define __NR_syscalls 293
/*
* 32 bit systems traditionally used different

View File

@ -29,6 +29,7 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/signal.h>
#include <asm/byteorder.h>
typedef __kernel_ulong_t aio_context_t;
@ -108,5 +109,10 @@ struct iocb {
#undef IFBIG
#undef IFLITTLE
struct __aio_sigset {
sigset_t __user *sigmask;
size_t sigsetsize;
};
#endif /* __LINUX__AIO_ABI_H */

View File

@ -43,7 +43,9 @@ COND_SYSCALL(io_submit);
COND_SYSCALL_COMPAT(io_submit);
COND_SYSCALL(io_cancel);
COND_SYSCALL(io_getevents);
COND_SYSCALL(io_pgetevents);
COND_SYSCALL_COMPAT(io_getevents);
COND_SYSCALL_COMPAT(io_pgetevents);
/* fs/xattr.c */