Commit fd33c436 authored by James Morris's avatar James Morris

Merge tag 'seccomp-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into next

parents 2ccf4661 c2e1f2e3
...@@ -7953,6 +7953,16 @@ S: Maintained ...@@ -7953,6 +7953,16 @@ S: Maintained
F: drivers/mmc/host/sdhci.* F: drivers/mmc/host/sdhci.*
F: drivers/mmc/host/sdhci-pltfm.[ch] F: drivers/mmc/host/sdhci-pltfm.[ch]
SECURE COMPUTING
M: Kees Cook <keescook@chromium.org>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp
S: Supported
F: kernel/seccomp.c
F: include/uapi/linux/seccomp.h
F: include/linux/seccomp.h
K: \bsecure_computing
K: \bTIF_SECCOMP\b
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
M: Anton Vorontsov <anton@enomsg.org> M: Anton Vorontsov <anton@enomsg.org>
L: linuxppc-dev@lists.ozlabs.org L: linuxppc-dev@lists.ozlabs.org
......
...@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER ...@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
- secure_computing is called from a ptrace_event()-safe context - secure_computing is called from a ptrace_event()-safe context
- secure_computing return value is checked and a return value of -1 - secure_computing return value is checked and a return value of -1
results in the system call being skipped immediately. results in the system call being skipped immediately.
- seccomp syscall wired up
config SECCOMP_FILTER config SECCOMP_FILTER
def_bool y def_bool y
......
...@@ -409,6 +409,7 @@ ...@@ -409,6 +409,7 @@
#define __NR_sched_setattr (__NR_SYSCALL_BASE+380) #define __NR_sched_setattr (__NR_SYSCALL_BASE+380)
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381) #define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
#define __NR_renameat2 (__NR_SYSCALL_BASE+382) #define __NR_renameat2 (__NR_SYSCALL_BASE+382)
#define __NR_seccomp (__NR_SYSCALL_BASE+383)
/* /*
* This may need to be greater than __NR_last_syscall+1 in order to * This may need to be greater than __NR_last_syscall+1 in order to
......
...@@ -392,6 +392,7 @@ ...@@ -392,6 +392,7 @@
/* 380 */ CALL(sys_sched_setattr) /* 380 */ CALL(sys_sched_setattr)
CALL(sys_sched_getattr) CALL(sys_sched_getattr)
CALL(sys_renameat2) CALL(sys_renameat2)
CALL(sys_seccomp)
#ifndef syscalls_counted #ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted #define syscalls_counted
......
...@@ -372,16 +372,17 @@ ...@@ -372,16 +372,17 @@
#define __NR_sched_setattr (__NR_Linux + 349) #define __NR_sched_setattr (__NR_Linux + 349)
#define __NR_sched_getattr (__NR_Linux + 350) #define __NR_sched_getattr (__NR_Linux + 350)
#define __NR_renameat2 (__NR_Linux + 351) #define __NR_renameat2 (__NR_Linux + 351)
#define __NR_seccomp (__NR_Linux + 352)
/* /*
* Offset of the last Linux o32 flavoured syscall * Offset of the last Linux o32 flavoured syscall
*/ */
#define __NR_Linux_syscalls 351 #define __NR_Linux_syscalls 352
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
#define __NR_O32_Linux 4000 #define __NR_O32_Linux 4000
#define __NR_O32_Linux_syscalls 351 #define __NR_O32_Linux_syscalls 352
#if _MIPS_SIM == _MIPS_SIM_ABI64 #if _MIPS_SIM == _MIPS_SIM_ABI64
...@@ -701,16 +702,17 @@ ...@@ -701,16 +702,17 @@
#define __NR_sched_setattr (__NR_Linux + 309) #define __NR_sched_setattr (__NR_Linux + 309)
#define __NR_sched_getattr (__NR_Linux + 310) #define __NR_sched_getattr (__NR_Linux + 310)
#define __NR_renameat2 (__NR_Linux + 311) #define __NR_renameat2 (__NR_Linux + 311)
#define __NR_seccomp (__NR_Linux + 312)
/* /*
* Offset of the last Linux 64-bit flavoured syscall * Offset of the last Linux 64-bit flavoured syscall
*/ */
#define __NR_Linux_syscalls 311 #define __NR_Linux_syscalls 312
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
#define __NR_64_Linux 5000 #define __NR_64_Linux 5000
#define __NR_64_Linux_syscalls 311 #define __NR_64_Linux_syscalls 312
#if _MIPS_SIM == _MIPS_SIM_NABI32 #if _MIPS_SIM == _MIPS_SIM_NABI32
...@@ -1034,15 +1036,16 @@ ...@@ -1034,15 +1036,16 @@
#define __NR_sched_setattr (__NR_Linux + 313) #define __NR_sched_setattr (__NR_Linux + 313)
#define __NR_sched_getattr (__NR_Linux + 314) #define __NR_sched_getattr (__NR_Linux + 314)
#define __NR_renameat2 (__NR_Linux + 315) #define __NR_renameat2 (__NR_Linux + 315)
#define __NR_seccomp (__NR_Linux + 316)
/* /*
* Offset of the last N32 flavoured syscall * Offset of the last N32 flavoured syscall
*/ */
#define __NR_Linux_syscalls 315 #define __NR_Linux_syscalls 316
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
#define __NR_N32_Linux 6000 #define __NR_N32_Linux 6000
#define __NR_N32_Linux_syscalls 315 #define __NR_N32_Linux_syscalls 316
#endif /* _UAPI_ASM_UNISTD_H */ #endif /* _UAPI_ASM_UNISTD_H */
...@@ -578,3 +578,4 @@ EXPORT(sys_call_table) ...@@ -578,3 +578,4 @@ EXPORT(sys_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 4350 */ PTR sys_sched_getattr /* 4350 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
...@@ -431,4 +431,5 @@ EXPORT(sys_call_table) ...@@ -431,4 +431,5 @@ EXPORT(sys_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 5310 */ PTR sys_sched_getattr /* 5310 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
.size sys_call_table,.-sys_call_table .size sys_call_table,.-sys_call_table
...@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table) ...@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr PTR sys_sched_getattr
PTR sys_renameat2 /* 6315 */ PTR sys_renameat2 /* 6315 */
PTR sys_seccomp
.size sysn32_call_table,.-sysn32_call_table .size sysn32_call_table,.-sysn32_call_table
...@@ -557,4 +557,5 @@ EXPORT(sys32_call_table) ...@@ -557,4 +557,5 @@ EXPORT(sys32_call_table)
PTR sys_sched_setattr PTR sys_sched_setattr
PTR sys_sched_getattr /* 4350 */ PTR sys_sched_getattr /* 4350 */
PTR sys_renameat2 PTR sys_renameat2
PTR sys_seccomp
.size sys32_call_table,.-sys32_call_table .size sys32_call_table,.-sys32_call_table
...@@ -360,3 +360,4 @@ ...@@ -360,3 +360,4 @@
351 i386 sched_setattr sys_sched_setattr 351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr 352 i386 sched_getattr sys_sched_getattr
353 i386 renameat2 sys_renameat2 353 i386 renameat2 sys_renameat2
354 i386 seccomp sys_seccomp
...@@ -323,6 +323,7 @@ ...@@ -323,6 +323,7 @@
314 common sched_setattr sys_sched_setattr 314 common sched_setattr sys_sched_setattr
315 common sched_getattr sys_sched_getattr 315 common sched_getattr sys_sched_getattr
316 common renameat2 sys_renameat2 316 common renameat2 sys_renameat2
317 common seccomp sys_seccomp
# #
# x32-specific system call numbers start at 512 to avoid cache impact # x32-specific system call numbers start at 512 to avoid cache impact
......
...@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds); ...@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
/* /*
* determine how safe it is to execute the proposed program * determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against * - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH * PTRACE_ATTACH or seccomp thread-sync
*/ */
static void check_unsafe_exec(struct linux_binprm *bprm) static void check_unsafe_exec(struct linux_binprm *bprm)
{ {
...@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) ...@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
* This isn't strictly necessary, but it makes it harder for LSMs to * This isn't strictly necessary, but it makes it harder for LSMs to
* mess up. * mess up.
*/ */
if (current->no_new_privs) if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
t = p; t = p;
...@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm) ...@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->egid = current_egid(); bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
!current->no_new_privs && !task_no_new_privs(current) &&
kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */ /* Set-uid? */
......
...@@ -1307,13 +1307,12 @@ struct task_struct { ...@@ -1307,13 +1307,12 @@ struct task_struct {
* execve */ * execve */
unsigned in_iowait:1; unsigned in_iowait:1;
/* task may not gain privileges */
unsigned no_new_privs:1;
/* Revert to default priority/policy when forking */ /* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1; unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1; unsigned sched_contributes_to_load:1;
unsigned long atomic_flags; /* Flags needing atomic access. */
pid_t pid; pid_t pid;
pid_t tgid; pid_t tgid;
...@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags) ...@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
} }
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
static inline bool task_no_new_privs(struct task_struct *p)
{
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
static inline void task_set_no_new_privs(struct task_struct *p)
{
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
/* /*
* task->jobctl flags * task->jobctl flags
*/ */
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <uapi/linux/seccomp.h> #include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC)
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
#include <linux/thread_info.h> #include <linux/thread_info.h>
...@@ -14,11 +16,11 @@ struct seccomp_filter; ...@@ -14,11 +16,11 @@ struct seccomp_filter;
* *
* @mode: indicates one of the valid values above for controlled * @mode: indicates one of the valid values above for controlled
* system calls available to a process. * system calls available to a process.
* @filter: The metadata and ruleset for determining what system calls * @filter: must always point to a valid seccomp-filter or NULL as it is
* are allowed for a task. * accessed without locking during system call entry.
* *
* @filter must only be accessed from the context of current as there * @filter must only be accessed from the context of current as there
* is no locking. * is no read locking.
*/ */
struct seccomp { struct seccomp {
int mode; int mode;
......
...@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, ...@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2); unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
const char __user *uargs);
#endif #endif
...@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) ...@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
__SYSCALL(__NR_sched_getattr, sys_sched_getattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
#define __NR_renameat2 276 #define __NR_renameat2 276
__SYSCALL(__NR_renameat2, sys_renameat2) __SYSCALL(__NR_renameat2, sys_renameat2)
#define __NR_seccomp 277
__SYSCALL(__NR_seccomp, sys_seccomp)
#undef __NR_syscalls #undef __NR_syscalls
#define __NR_syscalls 277 #define __NR_syscalls 278
/* /*
* All syscalls below here should go away really, * All syscalls below here should go away really,
......
...@@ -10,6 +10,13 @@ ...@@ -10,6 +10,13 @@
#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
/* Valid operations for seccomp syscall. */
#define SECCOMP_SET_MODE_STRICT 0
#define SECCOMP_SET_MODE_FILTER 1
/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC 1
/* /*
* All BPF programs must return a 32-bit value. * All BPF programs must return a 32-bit value.
* The bottom 16-bits are for optional return data. * The bottom 16-bits are for optional return data.
......
...@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) ...@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto free_ti; goto free_ti;
tsk->stack = ti; tsk->stack = ti;
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
* the sighand lock in case orig has changed between now and
* then. Until then, filter must be NULL to avoid messing up
* the usage counts on the error path calling free_task.
*/
tsk->seccomp.filter = NULL;
#endif
setup_thread_stack(tsk, orig); setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk); clear_user_return_notifier(tsk);
...@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) ...@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
return 0; return 0;
} }
static void copy_seccomp(struct task_struct *p)
{
#ifdef CONFIG_SECCOMP
/*
* Must be called with sighand->lock held, which is common to
* all threads in the group. Holding cred_guard_mutex is not
* needed because this new task is not yet running and cannot
* be racing exec.
*/
BUG_ON(!spin_is_locked(&current->sighand->siglock));
/* Ref-count the new filter user, and assign it. */
get_seccomp_filter(current);
p->seccomp = current->seccomp;
/*
* Explicitly enable no_new_privs here in case it got set
* between the task_struct being duplicated and holding the
* sighand lock. The seccomp state and nnp must be in sync.
*/
if (task_no_new_privs(current))
task_set_no_new_privs(p);
/*
* If the parent gained a seccomp mode after copying thread
* flags and between before we held the sighand lock, we have
* to manually enable the seccomp thread flag here.
*/
if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
set_tsk_thread_flag(p, TIF_SECCOMP);
#endif
}
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{ {
current->clear_child_tid = tidptr; current->clear_child_tid = tidptr;
...@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto fork_out; goto fork_out;
ftrace_graph_init_task(p); ftrace_graph_init_task(p);
get_seccomp_filter(p);
rt_mutex_init_task(p); rt_mutex_init_task(p);
...@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1436,6 +1477,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
spin_lock(&current->sighand->siglock); spin_lock(&current->sighand->siglock);
/*
* Copy seccomp details explicitly here, in case they were changed
* before holding sighand lock.
*/
copy_seccomp(p);
/* /*
* Process group and session signals need to be delivered to just the * Process group and session signals need to be delivered to just the
* parent before the fork or both the parent and the child after the * parent before the fork or both the parent and the child after the
......
This diff is collapsed.
...@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, ...@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (arg2 != 1 || arg3 || arg4 || arg5) if (arg2 != 1 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
current->no_new_privs = 1; task_set_no_new_privs(current);
break; break;
case PR_GET_NO_NEW_PRIVS: case PR_GET_NO_NEW_PRIVS:
if (arg2 || arg3 || arg4 || arg5) if (arg2 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
return current->no_new_privs ? 1 : 0; return task_no_new_privs(current) ? 1 : 0;
case PR_GET_THP_DISABLE: case PR_GET_THP_DISABLE:
if (arg2 || arg3 || arg4 || arg5) if (arg2 || arg3 || arg4 || arg5)
return -EINVAL; return -EINVAL;
......
...@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at); ...@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */ /* compare kernel pointers */
cond_syscall(sys_kcmp); cond_syscall(sys_kcmp);
/* operate on Secure Computing state */
cond_syscall(sys_seccomp);
...@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) ...@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
* There is no exception for unconfined as change_hat is not * There is no exception for unconfined as change_hat is not
* available. * available.
*/ */
if (current->no_new_privs) if (task_no_new_privs(current))
return -EPERM; return -EPERM;
/* released below */ /* released below */
...@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec, ...@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
* no_new_privs is set because this aways results in a reduction * no_new_privs is set because this aways results in a reduction
* of permissions. * of permissions.
*/ */
if (current->no_new_privs && !unconfined(profile)) { if (task_no_new_privs(current) && !unconfined(profile)) {
put_cred(cred); put_cred(cred);
return -EPERM; return -EPERM;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment