Commit b94ae8ad authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'seccomp-v5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:
 "Mostly this is implementing the new flag SECCOMP_USER_NOTIF_FLAG_CONTINUE,
  but there are cleanups as well.

   - implement SECCOMP_USER_NOTIF_FLAG_CONTINUE (Christian Brauner)

   - fixes to selftests (Christian Brauner)

   - remove secure_computing() argument (Christian Brauner)"

* tag 'seccomp-v5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  seccomp: rework define for SECCOMP_USER_NOTIF_FLAG_CONTINUE
  seccomp: fix SECCOMP_USER_NOTIF_FLAG_CONTINUE test
  seccomp: simplify secure_computing()
  seccomp: test SECCOMP_USER_NOTIF_FLAG_CONTINUE
  seccomp: add SECCOMP_USER_NOTIF_FLAG_CONTINUE
  seccomp: avoid overflow in implicit constant conversion
parents 3b805ca1 23b2c96f
......@@ -923,7 +923,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
/* Do seccomp after ptrace; syscall may have changed. */
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
if (secure_computing(NULL) == -1)
if (secure_computing() == -1)
return -1;
#else
/* XXX: remove this once OABI gets fixed */
......
......@@ -1816,7 +1816,7 @@ int syscall_trace_enter(struct pt_regs *regs)
}
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
if (secure_computing() == -1)
return -1;
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
......
......@@ -342,7 +342,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
}
/* Do the secure computing check after ptrace. */
if (secure_computing(NULL) == -1)
if (secure_computing() == -1)
return -1;
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
......
......@@ -159,7 +159,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
* If this fails we might have return value in a0 from seccomp
* (via SECCOMP_RET_ERRNO/TRACE).
*/
if (secure_computing(NULL) == -1) {
if (secure_computing() == -1) {
syscall_set_nr(current, regs, -1);
return;
}
......
......@@ -856,7 +856,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
}
/* Do the secure computing check after ptrace. */
if (secure_computing(NULL)) {
if (secure_computing()) {
/* seccomp failures shouldn't expose any additional code. */
return -1;
}
......
......@@ -35,7 +35,7 @@ void handle_syscall(struct uml_pt_regs *r)
goto out;
/* Do the seccomp check after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
if (secure_computing() == -1)
goto out;
syscall = UPT_SYSCALL_NR(r);
......
......@@ -222,7 +222,7 @@ bool emulate_vsyscall(unsigned long error_code,
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
tmp = secure_computing(NULL);
tmp = secure_computing();
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
......
......@@ -33,10 +33,10 @@ struct seccomp {
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(const struct seccomp_data *sd)
static inline int secure_computing(void)
{
if (unlikely(test_thread_flag(TIF_SECCOMP)))
return __secure_computing(sd);
return __secure_computing(NULL);
return 0;
}
#else
......@@ -59,7 +59,7 @@ struct seccomp { };
struct seccomp_filter { };
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
static inline int secure_computing(struct seccomp_data *sd) { return 0; }
static inline int secure_computing(void) { return 0; }
#else
static inline void secure_computing_strict(int this_syscall) { return; }
#endif
......
......@@ -76,6 +76,35 @@ struct seccomp_notif {
struct seccomp_data data;
};
/*
* Valid flags for struct seccomp_notif_resp
*
* Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution!
* If set by the process supervising the syscalls of another process the
* syscall will continue. This is problematic because of an inherent TOCTOU.
* An attacker can exploit the time while the supervised process is waiting on
* a response from the supervising process to rewrite syscall arguments which
* are passed as pointers of the intercepted syscall.
* It should be absolutely clear that this means that the seccomp notifier
* _cannot_ be used to implement a security policy! It should only ever be used
* in scenarios where a more privileged process supervises the syscalls of a
* lesser privileged process to get around kernel-enforced security
* restrictions when the privileged process deems this safe. In other words,
* in order to continue a syscall the supervising process should be sure that
* another security mechanism or the kernel itself will sufficiently block
* syscalls if arguments are rewritten to something unsafe.
*
* Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF
* or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the
* same syscall, the most recently added filter takes precedence. This means
* that the new SECCOMP_RET_USER_NOTIF filter can override any
* SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all
* such filtered syscalls to be executed by sending the response
* SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally
* be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE.
*/
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0)
struct seccomp_notif_resp {
__u64 id;
__s64 val;
......
......@@ -75,6 +75,7 @@ struct seccomp_knotif {
/* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */
int error;
long val;
u32 flags;
/* Signals when this has entered SECCOMP_NOTIFY_REPLIED */
struct completion ready;
......@@ -732,11 +733,12 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++;
}
static void seccomp_do_user_notification(int this_syscall,
struct seccomp_filter *match,
const struct seccomp_data *sd)
static int seccomp_do_user_notification(int this_syscall,
struct seccomp_filter *match,
const struct seccomp_data *sd)
{
int err;
u32 flags = 0;
long ret = 0;
struct seccomp_knotif n = {};
......@@ -764,6 +766,7 @@ static void seccomp_do_user_notification(int this_syscall,
if (err == 0) {
ret = n.val;
err = n.error;
flags = n.flags;
}
/*
......@@ -780,8 +783,14 @@ static void seccomp_do_user_notification(int this_syscall,
list_del(&n.list);
out:
mutex_unlock(&match->notify_lock);
/* Userspace requests to continue the syscall. */
if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
return 0;
syscall_set_return_value(current, task_pt_regs(current),
err, ret);
return -1;
}
static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
......@@ -867,8 +876,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
return 0;
case SECCOMP_RET_USER_NOTIF:
seccomp_do_user_notification(this_syscall, match, sd);
goto skip;
if (seccomp_do_user_notification(this_syscall, match, sd))
goto skip;
return 0;
case SECCOMP_RET_LOG:
seccomp_log(this_syscall, 0, action, true);
......@@ -1087,7 +1098,11 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
if (copy_from_user(&resp, buf, sizeof(resp)))
return -EFAULT;
if (resp.flags)
if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
return -EINVAL;
if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
(resp.error || resp.val))
return -EINVAL;
ret = mutex_lock_interruptible(&filter->notify_lock);
......@@ -1116,6 +1131,7 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
knotif->state = SECCOMP_NOTIFY_REPLIED;
knotif->error = resp.error;
knotif->val = resp.val;
knotif->flags = resp.flags;
complete(&knotif->ready);
out:
mutex_unlock(&filter->notify_lock);
......
......@@ -35,6 +35,7 @@
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <limits.h>
#include <linux/elf.h>
#include <sys/uio.h>
#include <sys/utsname.h>
......@@ -43,6 +44,7 @@
#include <sys/times.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/kcmp.h>
#include <unistd.h>
#include <sys/syscall.h>
......@@ -206,6 +208,10 @@ struct seccomp_notif_sizes {
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
#endif
#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
#endif
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
......@@ -3083,7 +3089,7 @@ static int user_trap_syscall(int nr, unsigned int flags)
return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
}
#define USER_NOTIF_MAGIC 116983961184613L
#define USER_NOTIF_MAGIC INT_MAX
TEST(user_notification_basic)
{
pid_t pid;
......@@ -3491,6 +3497,108 @@ TEST(seccomp_get_notif_sizes)
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
}
static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
{
#ifdef __NR_kcmp
return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
#else
errno = ENOSYS;
return -1;
#endif
}
TEST(user_notification_continue)
{
pid_t pid;
long ret;
int status, listener;
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
struct pollfd pollfd;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
int dup_fd, pipe_fds[2];
pid_t self;
ret = pipe(pipe_fds);
if (ret < 0)
exit(1);
dup_fd = dup(pipe_fds[0]);
if (dup_fd < 0)
exit(1);
self = getpid();
ret = filecmp(self, self, pipe_fds[0], dup_fd);
if (ret)
exit(2);
exit(0);
}
pollfd.fd = listener;
pollfd.events = POLLIN | POLLOUT;
EXPECT_GT(poll(&pollfd, 1, -1), 0);
EXPECT_EQ(pollfd.revents, POLLIN);
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
pollfd.fd = listener;
pollfd.events = POLLIN | POLLOUT;
EXPECT_GT(poll(&pollfd, 1, -1), 0);
EXPECT_EQ(pollfd.revents, POLLOUT);
EXPECT_EQ(req.data.nr, __NR_dup);
resp.id = req.id;
resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
/*
* Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
* args be set to 0.
*/
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
EXPECT_EQ(errno, EINVAL);
resp.error = USER_NOTIF_MAGIC;
resp.val = 0;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
EXPECT_EQ(errno, EINVAL);
resp.error = 0;
resp.val = 0;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
if (errno == EINVAL)
XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
}
skip:
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status)) {
if (WEXITSTATUS(status) == 2) {
XFAIL(return, "Kernel does not support kcmp() syscall");
return;
}
}
}
/*
* TODO:
* - add microbenchmarks
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment