Commit 7a074e96 authored by Christoph Hellwig's avatar Christoph Hellwig

aio: implement io_pgetevents

This is the io_getevents equivalent of ppoll/pselect and allows to
properly mix signals and aio completions (especially with IOCB_CMD_POLL)
and atomically executes the following sequence:

	sigset_t origmask;

	pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
	ret = io_getevents(ctx, min_nr, nr, events, timeout);
	pthread_sigmask(SIG_SETMASK, &origmask, NULL);

Note that unlike many other signal related calls we do not pass a sigmask
size, as that would get us to 7 arguments, which aren't easily supported
by the syscall infrastructure.  It seems a lot less painful to just add a
new syscall variant in the unlikely case we're going to increase the
sigset size.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent a3c0d439
...@@ -396,3 +396,4 @@ ...@@ -396,3 +396,4 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free 382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx 383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
...@@ -341,6 +341,7 @@ ...@@ -341,6 +341,7 @@
330 common pkey_alloc __x64_sys_pkey_alloc 330 common pkey_alloc __x64_sys_pkey_alloc
331 common pkey_free __x64_sys_pkey_free 331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx 332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
# #
# x32-specific system call numbers start at 512 to avoid cache impact # x32-specific system call numbers start at 512 to avoid cache impact
......
...@@ -1303,10 +1303,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, ...@@ -1303,10 +1303,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
wait_event_interruptible_hrtimeout(ctx->wait, wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret), aio_read_events(ctx, min_nr, nr, event, &ret),
until); until);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret; return ret;
} }
...@@ -1921,13 +1917,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, ...@@ -1921,13 +1917,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct timespec __user *, timeout) struct timespec __user *, timeout)
{ {
struct timespec64 ts; struct timespec64 ts;
int ret;
if (timeout && unlikely(get_timespec64(&ts, timeout)))
return -EFAULT;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret;
}
SYSCALL_DEFINE6(io_pgetevents,
aio_context_t, ctx_id,
long, min_nr,
long, nr,
struct io_event __user *, events,
struct timespec __user *, timeout,
const struct __aio_sigset __user *, usig)
{
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
int ret;
if (timeout) { if (timeout && unlikely(get_timespec64(&ts, timeout)))
if (unlikely(get_timespec64(&ts, timeout)))
return -EFAULT; return -EFAULT;
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
if (ksig.sigmask) {
if (ksig.sigsetsize != sizeof(sigset_t))
return -EINVAL;
if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
return -EFAULT;
sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
}
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
if (signal_pending(current)) {
if (ksig.sigmask) {
current->saved_sigmask = sigsaved;
set_restore_sigmask();
}
if (!ret)
ret = -ERESTARTNOHAND;
} else {
if (ksig.sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
} }
return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); return ret;
} }
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
...@@ -1938,13 +1981,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, ...@@ -1938,13 +1981,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct compat_timespec __user *, timeout) struct compat_timespec __user *, timeout)
{ {
struct timespec64 t; struct timespec64 t;
int ret;
if (timeout) { if (timeout && compat_get_timespec64(&t, timeout))
if (compat_get_timespec64(&t, timeout))
return -EFAULT; return -EFAULT;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
if (!ret && signal_pending(current))
ret = -EINTR;
return ret;
}
struct __compat_aio_sigset {
compat_sigset_t __user *sigmask;
compat_size_t sigsetsize;
};
COMPAT_SYSCALL_DEFINE6(io_pgetevents,
compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
struct io_event __user *, events,
struct compat_timespec __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
int ret;
if (timeout && compat_get_timespec64(&t, timeout))
return -EFAULT;
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
if (ksig.sigmask) {
if (ksig.sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
if (get_compat_sigset(&ksigmask, ksig.sigmask))
return -EFAULT;
sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
} }
return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
if (signal_pending(current)) {
if (ksig.sigmask) {
current->saved_sigmask = sigsaved;
set_restore_sigmask();
}
if (!ret)
ret = -ERESTARTNOHAND;
} else {
if (ksig.sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
return ret;
} }
#endif #endif
...@@ -330,6 +330,7 @@ extern int put_compat_rusage(const struct rusage *, ...@@ -330,6 +330,7 @@ extern int put_compat_rusage(const struct rusage *,
struct compat_rusage __user *); struct compat_rusage __user *);
struct compat_siginfo; struct compat_siginfo;
struct __compat_aio_sigset;
struct compat_dirent { struct compat_dirent {
u32 d_ino; u32 d_ino;
...@@ -553,6 +554,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, ...@@ -553,6 +554,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
compat_long_t nr, compat_long_t nr,
struct io_event __user *events, struct io_event __user *events,
struct compat_timespec __user *timeout); struct compat_timespec __user *timeout);
asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
compat_long_t min_nr,
compat_long_t nr,
struct io_event __user *events,
struct compat_timespec __user *timeout,
const struct __compat_aio_sigset __user *usig);
/* fs/cookies.c */ /* fs/cookies.c */
asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t); asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
......
...@@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id, ...@@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
long nr, long nr,
struct io_event __user *events, struct io_event __user *events,
struct timespec __user *timeout); struct timespec __user *timeout);
asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
long min_nr,
long nr,
struct io_event __user *events,
struct timespec __user *timeout,
const struct __aio_sigset *sig);
/* fs/xattr.c */ /* fs/xattr.c */
asmlinkage long sys_setxattr(const char __user *path, const char __user *name, asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
......
...@@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) ...@@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free) __SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291 #define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx) __SYSCALL(__NR_statx, sys_statx)
#define __NR_io_pgetevents 292
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#undef __NR_syscalls #undef __NR_syscalls
#define __NR_syscalls 292 #define __NR_syscalls 293
/* /*
* 32 bit systems traditionally used different * 32 bit systems traditionally used different
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/signal.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
typedef __kernel_ulong_t aio_context_t; typedef __kernel_ulong_t aio_context_t;
...@@ -108,5 +109,10 @@ struct iocb { ...@@ -108,5 +109,10 @@ struct iocb {
#undef IFBIG #undef IFBIG
#undef IFLITTLE #undef IFLITTLE
struct __aio_sigset {
sigset_t __user *sigmask;
size_t sigsetsize;
};
#endif /* __LINUX__AIO_ABI_H */ #endif /* __LINUX__AIO_ABI_H */
...@@ -43,7 +43,9 @@ COND_SYSCALL(io_submit); ...@@ -43,7 +43,9 @@ COND_SYSCALL(io_submit);
COND_SYSCALL_COMPAT(io_submit); COND_SYSCALL_COMPAT(io_submit);
COND_SYSCALL(io_cancel); COND_SYSCALL(io_cancel);
COND_SYSCALL(io_getevents); COND_SYSCALL(io_getevents);
COND_SYSCALL(io_pgetevents);
COND_SYSCALL_COMPAT(io_getevents); COND_SYSCALL_COMPAT(io_getevents);
COND_SYSCALL_COMPAT(io_pgetevents);
/* fs/xattr.c */ /* fs/xattr.c */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment