Commit 9ecc6ea4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'seccomp-v5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:
 "There are a bunch of clean ups and selftest improvements along with
  two major updates to the SECCOMP_RET_USER_NOTIF filter return:
  EPOLLHUP support to more easily detect the death of a monitored
  process, and being able to inject fds when intercepting syscalls that
  expect an fd-opening side-effect (needed by both container folks and
  Chrome). The latter continued the refactoring of __scm_install_fd()
  started by Christoph, and in the process found and fixed a handful of
  bugs in various callers.

   - Improved selftest coverage, timeouts, and reporting

   - Add EPOLLHUP support for SECCOMP_RET_USER_NOTIF (Christian Brauner)

   - Refactor __scm_install_fd() into __receive_fd() and fix buggy
     callers

   - Introduce 'addfd' command for SECCOMP_RET_USER_NOTIF (Sargun
     Dhillon)"

* tag 'seccomp-v5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (30 commits)
  selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD
  seccomp: Introduce addfd ioctl to seccomp user notifier
  fs: Expand __receive_fd() to accept existing fd
  pidfd: Replace open-coded receive_fd()
  fs: Add receive_fd() wrapper for __receive_fd()
  fs: Move __scm_install_fd() to __receive_fd()
  net/scm: Regularize compat handling of scm_detach_fds()
  pidfd: Add missing sock updates for pidfd_getfd()
  net/compat: Add missing sock updates for SCM_RIGHTS
  selftests/seccomp: Check ENOSYS under tracing
  selftests/seccomp: Refactor to use fixture variants
  selftests/harness: Clean up kern-doc for fixtures
  seccomp: Use -1 marker for end of mode 1 syscall list
  seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID
  selftests/seccomp: Rename user_trap_syscall() to user_notif_syscall()
  selftests/seccomp: Make kcmp() less required
  seccomp: Use pr_fmt
  selftests/seccomp: Improve calibration loop
  selftests/seccomp: use 90s as timeout
  selftests/seccomp: Expand benchmark to per-filter measurements
  ...
parents 99ea1521 c97aedc5
...@@ -9,12 +9,12 @@ static inline const int *get_compat_mode1_syscalls(void) ...@@ -9,12 +9,12 @@ static inline const int *get_compat_mode1_syscalls(void)
static const int syscalls_O32[] = { static const int syscalls_O32[] = {
__NR_O32_Linux + 3, __NR_O32_Linux + 4, __NR_O32_Linux + 3, __NR_O32_Linux + 4,
__NR_O32_Linux + 1, __NR_O32_Linux + 193, __NR_O32_Linux + 1, __NR_O32_Linux + 193,
0, /* null terminated */ -1, /* negative terminated */
}; };
static const int syscalls_N32[] = { static const int syscalls_N32[] = {
__NR_N32_Linux + 0, __NR_N32_Linux + 1, __NR_N32_Linux + 0, __NR_N32_Linux + 1,
__NR_N32_Linux + 58, __NR_N32_Linux + 211, __NR_N32_Linux + 58, __NR_N32_Linux + 211,
0, /* null terminated */ -1, /* negative terminated */
}; };
if (IS_ENABLED(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS)) if (IS_ENABLED(CONFIG_MIPS32_O32) && test_thread_flag(TIF_32BIT_REGS))
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <net/sock.h>
unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG; unsigned int sysctl_nr_open_min = BITS_PER_LONG;
...@@ -613,6 +614,10 @@ void __fd_install(struct files_struct *files, unsigned int fd, ...@@ -613,6 +614,10 @@ void __fd_install(struct files_struct *files, unsigned int fd,
rcu_read_unlock_sched(); rcu_read_unlock_sched();
} }
/*
* This consumes the "file" refcount, so callers should treat it
* as if they had called fput(file).
*/
void fd_install(unsigned int fd, struct file *file) void fd_install(unsigned int fd, struct file *file)
{ {
__fd_install(current->files, fd, file); __fd_install(current->files, fd, file);
...@@ -931,6 +936,62 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) ...@@ -931,6 +936,62 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
return err; return err;
} }
/**
* __receive_fd() - Install received file into file descriptor table
*
* @fd: fd to install into (if negative, a new fd will be allocated)
* @file: struct file that was received from another process
* @ufd: __user pointer to write new fd number to
* @o_flags: the O_* flags to apply to the new fd entry
*
* Installs a received file into the file descriptor table, with appropriate
* checks and count updates. Optionally writes the fd number to userspace, if
* @ufd is non-NULL.
*
* This helper handles its own reference counting of the incoming
* struct file.
*
* Returns newly install fd or -ve on error.
*/
int __receive_fd(int fd, struct file *file, int __user *ufd, unsigned int o_flags)
{
int new_fd;
int error;
error = security_file_receive(file);
if (error)
return error;
if (fd < 0) {
new_fd = get_unused_fd_flags(o_flags);
if (new_fd < 0)
return new_fd;
} else {
new_fd = fd;
}
if (ufd) {
error = put_user(new_fd, ufd);
if (error) {
if (fd < 0)
put_unused_fd(new_fd);
return error;
}
}
if (fd < 0) {
fd_install(new_fd, get_file(file));
} else {
error = replace_fd(new_fd, file, o_flags);
if (error)
return error;
}
/* Bump the sock usage counts, if any. */
__receive_sock(file);
return new_fd;
}
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{ {
int err = -EBADF; int err = -EBADF;
......
...@@ -341,6 +341,8 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) ...@@ -341,6 +341,8 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p)); seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
seq_put_decimal_ull(m, "\nSeccomp_filters:\t",
atomic_read(&p->seccomp.filter_count));
#endif #endif
seq_puts(m, "\nSpeculation_Store_Bypass:\t"); seq_puts(m, "\nSpeculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
......
...@@ -33,7 +33,7 @@ static inline const int *get_compat_mode1_syscalls(void) ...@@ -33,7 +33,7 @@ static inline const int *get_compat_mode1_syscalls(void)
static const int mode1_syscalls_32[] = { static const int mode1_syscalls_32[] = {
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_read_32, __NR_seccomp_write_32,
__NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
0, /* null terminated */ -1, /* negative terminated */
}; };
return mode1_syscalls_32; return mode1_syscalls_32;
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/posix_types.h> #include <linux/posix_types.h>
#include <linux/errno.h>
struct file; struct file;
...@@ -91,6 +92,24 @@ extern void put_unused_fd(unsigned int fd); ...@@ -91,6 +92,24 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file); extern void fd_install(unsigned int fd, struct file *file);
extern int __receive_fd(int fd, struct file *file, int __user *ufd,
unsigned int o_flags);
static inline int receive_fd_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
if (ufd == NULL)
return -EFAULT;
return __receive_fd(-1, file, ufd, o_flags);
}
static inline int receive_fd(struct file *file, unsigned int o_flags)
{
return __receive_fd(-1, file, NULL, o_flags);
}
static inline int receive_fd_replace(int fd, struct file *file, unsigned int o_flags)
{
return __receive_fd(fd, file, NULL, o_flags);
}
extern void flush_delayed_fput(void); extern void flush_delayed_fput(void);
extern void __fput_sync(struct file *); extern void __fput_sync(struct file *);
......
...@@ -10,9 +10,14 @@ ...@@ -10,9 +10,14 @@
SECCOMP_FILTER_FLAG_NEW_LISTENER | \ SECCOMP_FILTER_FLAG_NEW_LISTENER | \
SECCOMP_FILTER_FLAG_TSYNC_ESRCH) SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
/* sizeof() the first published struct seccomp_notif_addfd */
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
#define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
#include <linux/thread_info.h> #include <linux/thread_info.h>
#include <linux/atomic.h>
#include <asm/seccomp.h> #include <asm/seccomp.h>
struct seccomp_filter; struct seccomp_filter;
...@@ -29,6 +34,7 @@ struct seccomp_filter; ...@@ -29,6 +34,7 @@ struct seccomp_filter;
*/ */
struct seccomp { struct seccomp {
int mode; int mode;
atomic_t filter_count;
struct seccomp_filter *filter; struct seccomp_filter *filter;
}; };
...@@ -82,10 +88,10 @@ static inline int seccomp_mode(struct seccomp *s) ...@@ -82,10 +88,10 @@ static inline int seccomp_mode(struct seccomp *s)
#endif /* CONFIG_SECCOMP */ #endif /* CONFIG_SECCOMP */
#ifdef CONFIG_SECCOMP_FILTER #ifdef CONFIG_SECCOMP_FILTER
extern void put_seccomp_filter(struct task_struct *tsk); extern void seccomp_filter_release(struct task_struct *tsk);
extern void get_seccomp_filter(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk);
#else /* CONFIG_SECCOMP_FILTER */ #else /* CONFIG_SECCOMP_FILTER */
static inline void put_seccomp_filter(struct task_struct *tsk) static inline void seccomp_filter_release(struct task_struct *tsk)
{ {
return; return;
} }
......
...@@ -891,6 +891,8 @@ static inline int sk_memalloc_socks(void) ...@@ -891,6 +891,8 @@ static inline int sk_memalloc_socks(void)
{ {
return static_branch_unlikely(&memalloc_socks_key); return static_branch_unlikely(&memalloc_socks_key);
} }
void __receive_sock(struct file *file);
#else #else
static inline int sk_memalloc_socks(void) static inline int sk_memalloc_socks(void)
...@@ -898,6 +900,8 @@ static inline int sk_memalloc_socks(void) ...@@ -898,6 +900,8 @@ static inline int sk_memalloc_socks(void)
return 0; return 0;
} }
static inline void __receive_sock(struct file *file)
{ }
#endif #endif
static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
......
...@@ -113,6 +113,25 @@ struct seccomp_notif_resp { ...@@ -113,6 +113,25 @@ struct seccomp_notif_resp {
__u32 flags; __u32 flags;
}; };
/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
/**
* struct seccomp_notif_addfd
* @id: The ID of the seccomp notification
* @flags: SECCOMP_ADDFD_FLAG_*
* @srcfd: The local fd number
* @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
* @newfd_flags: The O_* flags the remote FD should have applied
*/
struct seccomp_notif_addfd {
__u64 id;
__u32 flags;
__u32 srcfd;
__u32 newfd;
__u32 newfd_flags;
};
#define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IOC_MAGIC '!'
#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
...@@ -123,5 +142,9 @@ struct seccomp_notif_resp { ...@@ -123,5 +142,9 @@ struct seccomp_notif_resp {
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp) struct seccomp_notif_resp)
#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
/* On success, the return value is the remote process's added fd number */
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
struct seccomp_notif_addfd)
#endif /* _UAPI_LINUX_SECCOMP_H */ #endif /* _UAPI_LINUX_SECCOMP_H */
...@@ -204,6 +204,9 @@ struct task_struct init_task ...@@ -204,6 +204,9 @@ struct task_struct init_task
#ifdef CONFIG_SECURITY #ifdef CONFIG_SECURITY
.security = NULL, .security = NULL,
#endif #endif
#ifdef CONFIG_SECCOMP
.seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
}; };
EXPORT_SYMBOL(init_task); EXPORT_SYMBOL(init_task);
......
...@@ -217,6 +217,7 @@ void release_task(struct task_struct *p) ...@@ -217,6 +217,7 @@ void release_task(struct task_struct *p)
} }
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
seccomp_filter_release(p);
proc_flush_pid(thread_pid); proc_flush_pid(thread_pid);
put_pid(thread_pid); put_pid(thread_pid);
release_thread(p); release_thread(p);
......
...@@ -479,7 +479,6 @@ void free_task(struct task_struct *tsk) ...@@ -479,7 +479,6 @@ void free_task(struct task_struct *tsk)
#endif #endif
rt_mutex_debug_task_free(tsk); rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk); ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
arch_release_task_struct(tsk); arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD) if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk); free_kthread_struct(tsk);
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <net/sock.h>
struct pid init_struct_pid = { struct pid init_struct_pid = {
.count = REFCOUNT_INIT(1), .count = REFCOUNT_INIT(1),
...@@ -635,17 +636,8 @@ static int pidfd_getfd(struct pid *pid, int fd) ...@@ -635,17 +636,8 @@ static int pidfd_getfd(struct pid *pid, int fd)
if (IS_ERR(file)) if (IS_ERR(file))
return PTR_ERR(file); return PTR_ERR(file);
ret = security_file_receive(file); ret = receive_fd(file, O_CLOEXEC);
if (ret) { fput(file);
fput(file);
return ret;
}
ret = get_unused_fd_flags(O_CLOEXEC);
if (ret < 0)
fput(file);
else
fd_install(ret, file);
return ret; return ret;
} }
......
This diff is collapsed.
...@@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat ...@@ -281,39 +281,31 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
return 0; return 0;
} }
void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) static int scm_max_fds_compat(struct msghdr *msg)
{ {
struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; if (msg->msg_controllen <= sizeof(struct compat_cmsghdr))
int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int); return 0;
int fdnum = scm->fp->count; return (msg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
struct file **fp = scm->fp->fp; }
int __user *cmfptr;
int err = 0, i;
if (fdnum < fdmax) void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
fdmax = fdnum; {
struct compat_cmsghdr __user *cm =
(struct compat_cmsghdr __user *)msg->msg_control;
unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds_compat(msg), scm->fp->count);
int __user *cmsg_data = CMSG_USER_DATA(cm);
int err = 0, i;
for (i = 0, cmfptr = (int __user *) CMSG_COMPAT_DATA(cm); i < fdmax; i++, cmfptr++) { for (i = 0; i < fdmax; i++) {
int new_fd; err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
err = security_file_receive(fp[i]);
if (err)
break;
err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & kmsg->msg_flags
? O_CLOEXEC : 0);
if (err < 0) if (err < 0)
break; break;
new_fd = err;
err = put_user(new_fd, cmfptr);
if (err) {
put_unused_fd(new_fd);
break;
}
/* Bump the usage count and install the file. */
fd_install(new_fd, get_file(fp[i]));
} }
if (i > 0) { if (i > 0) {
int cmlen = CMSG_COMPAT_LEN(i * sizeof(int)); int cmlen = CMSG_COMPAT_LEN(i * sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level); err = put_user(SOL_SOCKET, &cm->cmsg_level);
if (!err) if (!err)
err = put_user(SCM_RIGHTS, &cm->cmsg_type); err = put_user(SCM_RIGHTS, &cm->cmsg_type);
...@@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) ...@@ -321,16 +313,19 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len); err = put_user(cmlen, &cm->cmsg_len);
if (!err) { if (!err) {
cmlen = CMSG_COMPAT_SPACE(i * sizeof(int)); cmlen = CMSG_COMPAT_SPACE(i * sizeof(int));
kmsg->msg_control += cmlen; if (msg->msg_controllen < cmlen)
kmsg->msg_controllen -= cmlen; cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
} }
} }
if (i < fdnum)
kmsg->msg_flags |= MSG_CTRUNC; if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
msg->msg_flags |= MSG_CTRUNC;
/* /*
* All of the files that fit in the message have had their * All of the files that fit in the message have had their usage counts
* usage counts incremented, so we just free the list. * incremented, so we just free the list.
*/ */
__scm_destroy(scm); __scm_destroy(scm);
} }
......
...@@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter ...@@ -280,36 +280,6 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
} }
EXPORT_SYMBOL(put_cmsg_scm_timestamping); EXPORT_SYMBOL(put_cmsg_scm_timestamping);
static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
{
struct socket *sock;
int new_fd;
int error;
error = security_file_receive(file);
if (error)
return error;
new_fd = get_unused_fd_flags(o_flags);
if (new_fd < 0)
return new_fd;
error = put_user(new_fd, ufd);
if (error) {
put_unused_fd(new_fd);
return error;
}
/* Bump the usage count and install the file. */
sock = sock_from_file(file, &error);
if (sock) {
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
sock_update_classid(&sock->sk->sk_cgrp_data);
}
fd_install(new_fd, get_file(file));
return 0;
}
static int scm_max_fds(struct msghdr *msg) static int scm_max_fds(struct msghdr *msg)
{ {
if (msg->msg_controllen <= sizeof(struct cmsghdr)) if (msg->msg_controllen <= sizeof(struct cmsghdr))
...@@ -319,29 +289,29 @@ static int scm_max_fds(struct msghdr *msg) ...@@ -319,29 +289,29 @@ static int scm_max_fds(struct msghdr *msg)
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{ {
struct cmsghdr __user *cm struct cmsghdr __user *cm =
= (__force struct cmsghdr __user*)msg->msg_control; (__force struct cmsghdr __user *)msg->msg_control;
int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; unsigned int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count); int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
int __user *cmsg_data = CMSG_USER_DATA(cm); int __user *cmsg_data = CMSG_USER_DATA(cm);
int err = 0, i; int err = 0, i;
/* no use for FD passing from kernel space callers */
if (WARN_ON_ONCE(!msg->msg_control_is_user))
return;
if (msg->msg_flags & MSG_CMSG_COMPAT) { if (msg->msg_flags & MSG_CMSG_COMPAT) {
scm_detach_fds_compat(msg, scm); scm_detach_fds_compat(msg, scm);
return; return;
} }
/* no use for FD passing from kernel space callers */
if (WARN_ON_ONCE(!msg->msg_control_is_user))
return;
for (i = 0; i < fdmax; i++) { for (i = 0; i < fdmax; i++) {
err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags); err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err) if (err < 0)
break; break;
} }
if (i > 0) { if (i > 0) {
int cmlen = CMSG_LEN(i * sizeof(int)); int cmlen = CMSG_LEN(i * sizeof(int));
err = put_user(SOL_SOCKET, &cm->cmsg_level); err = put_user(SOL_SOCKET, &cm->cmsg_level);
......
...@@ -2842,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * ...@@ -2842,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *
} }
EXPORT_SYMBOL(sock_no_mmap); EXPORT_SYMBOL(sock_no_mmap);
/*
* When a file is received (via SCM_RIGHTS, etc), we must bump the
* various sock-based usage counts.
*/
void __receive_sock(struct file *file)
{
struct socket *sock;
int error;
/*
* The resulting value of "error" is ignored here since we only
* need to take action when the file is a socket and testing
* "sock" for NULL is sufficient.
*/
sock = sock_from_file(file, &error);
if (sock) {
sock_update_netprioidx(&sock->sk->sk_cgrp_data);
sock_update_classid(&sock->sk->sk_cgrp_data);
}
}
ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{ {
ssize_t res; ssize_t res;
......
...@@ -195,8 +195,9 @@ ...@@ -195,8 +195,9 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE_DATA(datatype name) * FIXTURE_DATA(datatype_name)
* *
* Almost always, you want just FIXTURE() instead (see below).
* This call may be used when the type of the fixture data * This call may be used when the type of the fixture data
* is needed. In general, this should not be needed unless * is needed. In general, this should not be needed unless
* the *self* is being passed to a helper directly. * the *self* is being passed to a helper directly.
...@@ -211,7 +212,7 @@ ...@@ -211,7 +212,7 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE(datatype name) { * FIXTURE(fixture_name) {
* type property1; * type property1;
* ... * ...
* }; * };
...@@ -238,7 +239,7 @@ ...@@ -238,7 +239,7 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE_SETUP(fixture name) { implementation } * FIXTURE_SETUP(fixture_name) { implementation }
* *
* Populates the required "setup" function for a fixture. An instance of the * Populates the required "setup" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
...@@ -264,7 +265,7 @@ ...@@ -264,7 +265,7 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE_TEARDOWN(fixture name) { implementation } * FIXTURE_TEARDOWN(fixture_name) { implementation }
* *
* Populates the required "teardown" function for a fixture. An instance of the * Populates the required "teardown" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
...@@ -285,7 +286,7 @@ ...@@ -285,7 +286,7 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE_VARIANT(datatype name) { * FIXTURE_VARIANT(fixture_name) {
* type property1; * type property1;
* ... * ...
* }; * };
...@@ -305,8 +306,8 @@ ...@@ -305,8 +306,8 @@
* *
* .. code-block:: c * .. code-block:: c
* *
* FIXTURE_ADD(datatype name) { * FIXTURE_VARIANT_ADD(fixture_name, variant_name) {
* .property1 = val1; * .property1 = val1,
* ... * ...
* }; * };
* *
......
CONFIG_SECCOMP=y CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y CONFIG_SECCOMP_FILTER=y
CONFIG_USER_NS=y
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
unsigned long long timing(clockid_t clk_id, unsigned long long samples) unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{ {
pid_t pid, ret;
unsigned long long i;
struct timespec start, finish; struct timespec start, finish;
unsigned long long i;
pid_t pid, ret;
pid = getpid(); pid = getpid();
assert(clock_gettime(clk_id, &start) == 0); assert(clock_gettime(clk_id, &start) == 0);
...@@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) ...@@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
assert(clock_gettime(clk_id, &finish) == 0); assert(clock_gettime(clk_id, &finish) == 0);
i = finish.tv_sec - start.tv_sec; i = finish.tv_sec - start.tv_sec;
i *= 1000000000; i *= 1000000000ULL;
i += finish.tv_nsec - start.tv_nsec; i += finish.tv_nsec - start.tv_nsec;
printf("%lu.%09lu - %lu.%09lu = %llu\n", printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
finish.tv_sec, finish.tv_nsec, finish.tv_sec, finish.tv_nsec,
start.tv_sec, start.tv_nsec, start.tv_sec, start.tv_nsec,
i); i, (double)i / 1000000000.0);
return i; return i;
} }
unsigned long long calibrate(void) unsigned long long calibrate(void)
{ {
unsigned long long i; struct timespec start, finish;
unsigned long long i, samples, step = 9973;
printf("Calibrating reasonable sample size...\n"); pid_t pid, ret;
int seconds = 15;
for (i = 5; ; i++) { printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
unsigned long long samples = 1 << i;
/* Find something that takes more than 5 seconds to run. */ samples = 0;
if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) pid = getpid();
return samples; assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0);
} do {
for (i = 0; i < step; i++) {
ret = syscall(__NR_getpid);
assert(pid == ret);
}
assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0);
samples += step;
i = finish.tv_sec - start.tv_sec;
i *= 1000000000ULL;
i += finish.tv_nsec - start.tv_nsec;
} while (i < 1000000000ULL);
return samples * seconds;
} }
int main(int argc, char *argv[]) int main(int argc, char *argv[])
...@@ -68,32 +81,55 @@ int main(int argc, char *argv[]) ...@@ -68,32 +81,55 @@ int main(int argc, char *argv[])
}; };
long ret; long ret;
unsigned long long samples; unsigned long long samples;
unsigned long long native, filtered; unsigned long long native, filter1, filter2;
printf("Current BPF sysctl settings:\n");
system("sysctl net.core.bpf_jit_enable");
system("sysctl net.core.bpf_jit_harden");
if (argc > 1) if (argc > 1)
samples = strtoull(argv[1], NULL, 0); samples = strtoull(argv[1], NULL, 0);
else else
samples = calibrate(); samples = calibrate();
printf("Benchmarking %llu samples...\n", samples); printf("Benchmarking %llu syscalls...\n", samples);
/* Native call */
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid native: %llu ns\n", native); printf("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0); assert(ret == 0);
/* One filter */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0); assert(ret == 0);
filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid RET_ALLOW: %llu ns\n", filtered); printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
if (filter1 == native)
printf("No overhead measured!? Try running again with more samples.\n");
/* Two filters */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
/* Calculations */
printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
filter1 - native);
printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
filter2 - native);
printf("Estimated seccomp overhead per syscall: %llu ns\n", printf("Estimated seccomp per-filter overhead: %llu ns\n",
filtered - native); filter2 - filter1);
if (filtered == native) printf("Estimated seccomp entry overhead: %llu ns\n",
printf("Trying running again with more samples.\n"); filter1 - native - (filter2 - filter1));
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment