Commit 26b84401 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'lsm-pr-20221003' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm

Pull LSM updates from Paul Moore:
 "Seven patches for the LSM layer and we've got a mix of trivial and
  significant patches. Highlights below, starting with the smaller bits
  first so they don't get lost in the discussion of the larger items:

   - Remove some redundant NULL pointer checks in the common LSM audit
     code.

   - Ratelimit the lockdown LSM's access denial messages.

     With this change there is a chance that the last visible lockdown
     message on the console is outdated/old, but it does help preserve
     the initial series of lockdown denials that started the denial
     message flood and my gut feeling is that these might be the more
     valuable messages.

   - Open userfaultfds as readonly instead of read/write.

     While this code obviously lives outside the LSM, it does have a
     noticeable impact on the LSMs with Ondrej explaining the situation
     in the commit description. It is worth noting that this patch
     languished on the VFS list for over a year without any comments
     (objections or otherwise) so I took the liberty of pulling it into
     the LSM tree after giving fair notice. It has been in linux-next
     since the end of August without any noticeable problems.

   - Add a LSM hook for user namespace creation, with implementations
     for both the BPF LSM and SELinux.

     Even though the changes are fairly small, this is the bulk of the
     diffstat as we are also including BPF LSM selftests for the new
     hook.

     It's also the most contentious of the changes in this pull request
     with Eric Biederman NACK'ing the LSM hook multiple times during its
     development and discussion upstream. While I've never taken NACK's
     lightly, I'm sending these patches to you because it is my belief
     that they are of good quality, satisfy a long-standing need of
     users and distros, and are in keeping with the existing nature of
     the LSM layer and the Linux Kernel as a whole.

     The patches in implement a LSM hook for user namespace creation
     that allows for a granular approach, configurable at runtime, which
     enables both monitoring and control of user namespaces. The general
     consensus has been that this is far preferable to the other
     solutions that have been adopted downstream including outright
     removal from the kernel, disabling via system wide sysctls, or
     various other out-of-tree mechanisms that users have been forced to
     adopt since we haven't been able to provide them an upstream
     solution for their requests. Eric has been steadfast in his
     objections to this LSM hook, explaining that any restrictions on
     the user namespace could have significant impact on userspace.
     While there is the possibility of impacting userspace, it is
     important to note that this solution only impacts userspace when it
     is requested based on the runtime configuration supplied by the
     distro/admin/user. Frederick (the pathset author), the LSM/security
     community, and myself have tried to work with Eric during
     development of this patchset to find a mutually acceptable
     solution, but Eric's approach and unwillingness to engage in a
     meaningful way have made this impossible. I have CC'd Eric directly
     on this pull request so he has a chance to provide his side of the
     story; there have been no objections outside of Eric's"

* tag 'lsm-pr-20221003' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm:
  lockdown: ratelimit denial messages
  userfaultfd: open userfaultfds with O_RDONLY
  selinux: Implement userns_create hook
  selftests/bpf: Add tests verifying bpf lsm userns_create hook
  bpf-lsm: Make bpf_lsm_userns_create() sleepable
  security, lsm: Introduce security_create_user_ns()
  lsm: clean up redundant NULL pointer check
parents e816da29 1e7d8bcb
...@@ -991,7 +991,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new, ...@@ -991,7 +991,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
int fd; int fd;
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new, fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode); O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
if (fd < 0) if (fd < 0)
return fd; return fd;
...@@ -2094,7 +2094,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) ...@@ -2094,7 +2094,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
mmgrab(ctx->mm); mmgrab(ctx->mm);
fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx, fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL); O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
if (fd < 0) { if (fd < 0) {
mmdrop(ctx->mm); mmdrop(ctx->mm);
kmem_cache_free(userfaultfd_ctx_cachep, ctx); kmem_cache_free(userfaultfd_ctx_cachep, ctx);
......
...@@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2, ...@@ -224,6 +224,7 @@ LSM_HOOK(int, -ENOSYS, task_prctl, int option, unsigned long arg2,
unsigned long arg3, unsigned long arg4, unsigned long arg5) unsigned long arg3, unsigned long arg4, unsigned long arg5)
LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
struct inode *inode) struct inode *inode)
LSM_HOOK(int, 0, userns_create, const struct cred *cred)
LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
u32 *secid) u32 *secid)
......
...@@ -806,6 +806,10 @@ ...@@ -806,6 +806,10 @@
* security attributes, e.g. for /proc/pid inodes. * security attributes, e.g. for /proc/pid inodes.
* @p contains the task_struct for the task. * @p contains the task_struct for the task.
* @inode contains the inode structure for the inode. * @inode contains the inode structure for the inode.
* @userns_create:
* Check permission prior to creating a new user namespace.
* @cred points to prepared creds.
* Return 0 if successful, otherwise < 0 error code.
* *
* Security hooks for Netlink messaging. * Security hooks for Netlink messaging.
* *
......
...@@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info, ...@@ -437,6 +437,7 @@ int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5); unsigned long arg4, unsigned long arg5);
void security_task_to_inode(struct task_struct *p, struct inode *inode); void security_task_to_inode(struct task_struct *p, struct inode *inode);
int security_create_user_ns(const struct cred *cred);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
int security_msg_msg_alloc(struct msg_msg *msg); int security_msg_msg_alloc(struct msg_msg *msg);
...@@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2, ...@@ -1194,6 +1195,11 @@ static inline int security_task_prctl(int option, unsigned long arg2,
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{ } { }
static inline int security_create_user_ns(const struct cred *cred)
{
return 0;
}
static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
short flag) short flag)
{ {
......
...@@ -335,6 +335,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj) ...@@ -335,6 +335,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj)
BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_setscheduler)
BTF_ID(func, bpf_lsm_task_to_inode) BTF_ID(func, bpf_lsm_task_to_inode)
BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks) BTF_SET_END(sleepable_lsm_hooks)
bool bpf_lsm_is_sleepable_hook(u32 btf_id) bool bpf_lsm_is_sleepable_hook(u32 btf_id)
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/highuid.h> #include <linux/highuid.h>
#include <linux/cred.h> #include <linux/cred.h>
#include <linux/securebits.h> #include <linux/securebits.h>
#include <linux/security.h>
#include <linux/keyctl.h> #include <linux/keyctl.h>
#include <linux/key-type.h> #include <linux/key-type.h>
#include <keys/user-type.h> #include <keys/user-type.h>
...@@ -113,6 +114,10 @@ int create_user_ns(struct cred *new) ...@@ -113,6 +114,10 @@ int create_user_ns(struct cred *new)
!kgid_has_mapping(parent_ns, group)) !kgid_has_mapping(parent_ns, group))
goto fail_dec; goto fail_dec;
ret = security_create_user_ns(new);
if (ret < 0)
goto fail_dec;
ret = -ENOMEM; ret = -ENOMEM;
ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
if (!ns) if (!ns)
......
...@@ -63,7 +63,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) ...@@ -63,7 +63,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what)
if (kernel_locked_down >= what) { if (kernel_locked_down >= what) {
if (lockdown_reasons[what]) if (lockdown_reasons[what])
pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n", pr_notice_ratelimited("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
current->comm, lockdown_reasons[what]); current->comm, lockdown_reasons[what]);
return -EPERM; return -EPERM;
} }
......
...@@ -44,9 +44,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ...@@ -44,9 +44,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
struct iphdr *ih; struct iphdr *ih;
ih = ip_hdr(skb); ih = ip_hdr(skb);
if (ih == NULL)
return -EINVAL;
ad->u.net->v4info.saddr = ih->saddr; ad->u.net->v4info.saddr = ih->saddr;
ad->u.net->v4info.daddr = ih->daddr; ad->u.net->v4info.daddr = ih->daddr;
...@@ -59,8 +56,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ...@@ -59,8 +56,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
switch (ih->protocol) { switch (ih->protocol) {
case IPPROTO_TCP: { case IPPROTO_TCP: {
struct tcphdr *th = tcp_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
if (th == NULL)
break;
ad->u.net->sport = th->source; ad->u.net->sport = th->source;
ad->u.net->dport = th->dest; ad->u.net->dport = th->dest;
...@@ -68,8 +63,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ...@@ -68,8 +63,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
} }
case IPPROTO_UDP: { case IPPROTO_UDP: {
struct udphdr *uh = udp_hdr(skb); struct udphdr *uh = udp_hdr(skb);
if (uh == NULL)
break;
ad->u.net->sport = uh->source; ad->u.net->sport = uh->source;
ad->u.net->dport = uh->dest; ad->u.net->dport = uh->dest;
...@@ -77,8 +70,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ...@@ -77,8 +70,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
} }
case IPPROTO_DCCP: { case IPPROTO_DCCP: {
struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_hdr *dh = dccp_hdr(skb);
if (dh == NULL)
break;
ad->u.net->sport = dh->dccph_sport; ad->u.net->sport = dh->dccph_sport;
ad->u.net->dport = dh->dccph_dport; ad->u.net->dport = dh->dccph_dport;
...@@ -86,8 +77,7 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb, ...@@ -86,8 +77,7 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
} }
case IPPROTO_SCTP: { case IPPROTO_SCTP: {
struct sctphdr *sh = sctp_hdr(skb); struct sctphdr *sh = sctp_hdr(skb);
if (sh == NULL)
break;
ad->u.net->sport = sh->source; ad->u.net->sport = sh->source;
ad->u.net->dport = sh->dest; ad->u.net->dport = sh->dest;
break; break;
...@@ -115,8 +105,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb, ...@@ -115,8 +105,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
__be16 frag_off; __be16 frag_off;
ip6 = ipv6_hdr(skb); ip6 = ipv6_hdr(skb);
if (ip6 == NULL)
return -EINVAL;
ad->u.net->v6info.saddr = ip6->saddr; ad->u.net->v6info.saddr = ip6->saddr;
ad->u.net->v6info.daddr = ip6->daddr; ad->u.net->v6info.daddr = ip6->daddr;
/* IPv6 can have several extension header before the Transport header /* IPv6 can have several extension header before the Transport header
......
...@@ -1909,6 +1909,11 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode) ...@@ -1909,6 +1909,11 @@ void security_task_to_inode(struct task_struct *p, struct inode *inode)
call_void_hook(task_to_inode, p, inode); call_void_hook(task_to_inode, p, inode);
} }
int security_create_user_ns(const struct cred *cred)
{
return call_int_hook(userns_create, 0, cred);
}
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
{ {
return call_int_hook(ipc_permission, 0, ipcp, flag); return call_int_hook(ipc_permission, 0, ipcp, flag);
......
...@@ -4222,6 +4222,14 @@ static void selinux_task_to_inode(struct task_struct *p, ...@@ -4222,6 +4222,14 @@ static void selinux_task_to_inode(struct task_struct *p,
spin_unlock(&isec->lock); spin_unlock(&isec->lock);
} }
static int selinux_userns_create(const struct cred *cred)
{
u32 sid = current_sid();
return avc_has_perm(&selinux_state, sid, sid, SECCLASS_USER_NAMESPACE,
USER_NAMESPACE__CREATE, NULL);
}
/* Returns error only if unable to parse addresses */ /* Returns error only if unable to parse addresses */
static int selinux_parse_skb_ipv4(struct sk_buff *skb, static int selinux_parse_skb_ipv4(struct sk_buff *skb,
struct common_audit_data *ad, u8 *proto) struct common_audit_data *ad, u8 *proto)
...@@ -7128,6 +7136,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { ...@@ -7128,6 +7136,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(task_movememory, selinux_task_movememory), LSM_HOOK_INIT(task_movememory, selinux_task_movememory),
LSM_HOOK_INIT(task_kill, selinux_task_kill), LSM_HOOK_INIT(task_kill, selinux_task_kill),
LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode), LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode),
LSM_HOOK_INIT(userns_create, selinux_userns_create),
LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission), LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission),
LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid),
......
...@@ -254,6 +254,8 @@ const struct security_class_mapping secclass_map[] = { ...@@ -254,6 +254,8 @@ const struct security_class_mapping secclass_map[] = {
{ COMMON_FILE_PERMS, NULL } }, { COMMON_FILE_PERMS, NULL } },
{ "io_uring", { "io_uring",
{ "override_creds", "sqpoll", "cmd", NULL } }, { "override_creds", "sqpoll", "cmd", NULL } },
{ "user_namespace",
{ "create", NULL } },
{ NULL } { NULL }
}; };
......
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <test_progs.h>
#include "test_deny_namespace.skel.h"
#include <sched.h>
#include "cap_helpers.h"
#include <stdio.h>
static int wait_for_pid(pid_t pid)
{
int status, ret;
again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;
return -1;
}
if (!WIFEXITED(status))
return -1;
return WEXITSTATUS(status);
}
/* negative return value -> some internal error
* positive return value -> userns creation failed
* 0 -> userns creation succeeded
*/
static int create_user_ns(void)
{
pid_t pid;
pid = fork();
if (pid < 0)
return -1;
if (pid == 0) {
if (unshare(CLONE_NEWUSER))
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
return wait_for_pid(pid);
}
static void test_userns_create_bpf(void)
{
__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
__u64 old_caps = 0;
cap_enable_effective(cap_mask, &old_caps);
ASSERT_OK(create_user_ns(), "priv new user ns");
cap_disable_effective(cap_mask, &old_caps);
ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns");
if (cap_mask & old_caps)
cap_enable_effective(cap_mask, NULL);
}
static void test_unpriv_userns_create_no_bpf(void)
{
__u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
__u64 old_caps = 0;
cap_disable_effective(cap_mask, &old_caps);
ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns");
if (cap_mask & old_caps)
cap_enable_effective(cap_mask, NULL);
}
void test_deny_namespace(void)
{
struct test_deny_namespace *skel = NULL;
int err;
if (test__start_subtest("unpriv_userns_create_no_bpf"))
test_unpriv_userns_create_no_bpf();
skel = test_deny_namespace__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel load"))
goto close_prog;
err = test_deny_namespace__attach(skel);
if (!ASSERT_OK(err, "attach"))
goto close_prog;
if (test__start_subtest("userns_create_bpf"))
test_userns_create_bpf();
test_deny_namespace__detach(skel);
close_prog:
test_deny_namespace__destroy(skel);
}
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <errno.h>
#include <linux/capability.h>
struct kernel_cap_struct {
__u32 cap[_LINUX_CAPABILITY_U32S_3];
} __attribute__((preserve_access_index));
struct cred {
struct kernel_cap_struct cap_effective;
} __attribute__((preserve_access_index));
char _license[] SEC("license") = "GPL";
SEC("lsm.s/userns_create")
int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
{
struct kernel_cap_struct caps = cred->cap_effective;
int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
if (ret)
return 0;
ret = -EPERM;
if (caps.cap[cap_index] & cap_mask)
return 0;
return -EPERM;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment