Commit 2798b80b authored by David S. Miller's avatar David S. Miller

Merge branch 'eBPF-based-device-cgroup-controller'

Roman Gushchin says:

====================
eBPF-based device cgroup controller

This patchset introduces an eBPF-based device controller for cgroup v2.

Patches (1) and (2) are a preparational work required to share some code
  with the existing device controller implementation.
Patch (3) is the main patch, which introduces a new bpf prog type
  and all necessary infrastructure.
Patch (4) moves cgroup_helpers.c/h to use them by patch (4).
Patch (5) implements an example of eBPF program which controls access
  to device files and corresponding userspace test.

v3:
  Renamed constants introduced by patch (3) to BPF_DEVCG_*

v2:
  Added patch (1).

v1:
  https://lkml.org/lkml/2017/11/1/363
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 488e5b30 37f1ba09
...@@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops, struct bpf_sock_ops_kern *sock_ops,
enum bpf_attach_type type); enum bpf_attach_type type);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \ ({ \
...@@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
} \ } \
__ret; \ __ret; \
}) })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
access, \
BPF_CGROUP_DEVICE); \
\
__ret; \
})
#else #else
struct cgroup_bpf {}; struct cgroup_bpf {};
...@@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } ...@@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */ #endif /* CONFIG_CGROUP_BPF */
......
...@@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) ...@@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
#endif #endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/bpf-cgroup.h>
#define DEVCG_ACC_MKNOD 1
#define DEVCG_ACC_READ 2
#define DEVCG_ACC_WRITE 4
#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE)
#define DEVCG_DEV_BLOCK 1
#define DEVCG_DEV_CHAR 2
#define DEVCG_DEV_ALL 4 /* this represents all devices */
#ifdef CONFIG_CGROUP_DEVICE #ifdef CONFIG_CGROUP_DEVICE
extern int __devcgroup_inode_permission(struct inode *inode, int mask); extern int __devcgroup_check_permission(short type, u32 major, u32 minor,
extern int devcgroup_inode_mknod(int mode, dev_t dev); short access);
#else
static inline int __devcgroup_check_permission(short type, u32 major, u32 minor,
short access)
{ return 0; }
#endif
#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
static inline int devcgroup_check_permission(short type, u32 major, u32 minor,
short access)
{
int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access);
if (rc)
return -EPERM;
return __devcgroup_check_permission(type, major, minor, access);
}
static inline int devcgroup_inode_permission(struct inode *inode, int mask) static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{ {
short type, access = 0;
if (likely(!inode->i_rdev)) if (likely(!inode->i_rdev))
return 0; return 0;
if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
if (S_ISBLK(inode->i_mode))
type = DEVCG_DEV_BLOCK;
else if (S_ISCHR(inode->i_mode))
type = DEVCG_DEV_CHAR;
else
return 0;
if (mask & MAY_WRITE)
access |= DEVCG_ACC_WRITE;
if (mask & MAY_READ)
access |= DEVCG_ACC_READ;
return devcgroup_check_permission(type, imajor(inode), iminor(inode),
access);
}
static inline int devcgroup_inode_mknod(int mode, dev_t dev)
{
short type;
if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0; return 0;
return __devcgroup_inode_permission(inode, mask);
if (S_ISBLK(mode))
type = DEVCG_DEV_BLOCK;
else
type = DEVCG_DEV_CHAR;
return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
DEVCG_ACC_MKNOD);
} }
#else #else
static inline int devcgroup_inode_permission(struct inode *inode, int mask) static inline int devcgroup_inode_permission(struct inode *inode, int mask)
{ return 0; } { return 0; }
......
...@@ -132,6 +132,7 @@ enum bpf_prog_type { ...@@ -132,6 +132,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
}; };
enum bpf_attach_type { enum bpf_attach_type {
...@@ -141,6 +142,7 @@ enum bpf_attach_type { ...@@ -141,6 +142,7 @@ enum bpf_attach_type {
BPF_CGROUP_SOCK_OPS, BPF_CGROUP_SOCK_OPS,
BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT, BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
...@@ -991,4 +993,17 @@ struct bpf_perf_event_value { ...@@ -991,4 +993,17 @@ struct bpf_perf_event_value {
__u64 running; __u64 running;
}; };
#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
#define BPF_DEVCG_ACC_READ (1ULL << 1)
#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
struct bpf_cgroup_dev_ctx {
__u32 access_type; /* (access << 16) | type */
__u32 major;
__u32 minor;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, ...@@ -522,3 +522,70 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
return ret == 1 ? 0 : -EPERM; return ret == 1 ? 0 : -EPERM;
} }
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type)
{
struct cgroup *cgrp;
struct bpf_cgroup_dev_ctx ctx = {
.access_type = (access << 16) | dev_type,
.major = major,
.minor = minor,
};
int allow = 1;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
BPF_PROG_RUN);
rcu_read_unlock();
return !allow;
}
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_lookup_elem_proto;
case BPF_FUNC_map_update_elem:
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
default:
return NULL;
}
}
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
if (type == BPF_WRITE)
return false;
if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
return false;
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
if (size != sizeof(__u32))
return false;
return true;
}
const struct bpf_prog_ops cg_dev_prog_ops = {
};
const struct bpf_verifier_ops cg_dev_verifier_ops = {
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
...@@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) ...@@ -1326,6 +1326,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS; ptype = BPF_PROG_TYPE_SOCK_OPS;
break; break;
case BPF_CGROUP_DEVICE:
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT: case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, true); return sockmap_get_from_fd(attr, true);
...@@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) ...@@ -1378,6 +1381,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS; ptype = BPF_PROG_TYPE_SOCK_OPS;
break; break;
case BPF_CGROUP_DEVICE:
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT: case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, false); return sockmap_get_from_fd(attr, false);
...@@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr, ...@@ -1420,6 +1426,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_EGRESS: case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break; break;
default: default:
return -EINVAL; return -EINVAL;
......
...@@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env) ...@@ -3124,6 +3124,7 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
break; break;
default: default:
return 0; return 0;
......
...@@ -46,6 +46,7 @@ hostprogs-y += syscall_tp ...@@ -46,6 +46,7 @@ hostprogs-y += syscall_tp
# Libbpf dependencies # Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o LIBBPF := ../../tools/lib/bpf/bpf.o
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
test_lru_dist-objs := test_lru_dist.o $(LIBBPF) test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
sock_example-objs := sock_example.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF)
...@@ -69,13 +70,13 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o ...@@ -69,13 +70,13 @@ map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o
test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o cgroup_helpers.o test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o
xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o
# reuse xdp1 source intentionally # reuse xdp1 source intentionally
xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
test_current_task_under_cgroup_user.o test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
......
...@@ -15,15 +15,6 @@ ...@@ -15,15 +15,6 @@
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#define ACC_MKNOD 1
#define ACC_READ 2
#define ACC_WRITE 4
#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
#define DEV_BLOCK 1
#define DEV_CHAR 2
#define DEV_ALL 4 /* this represents all devices */
static DEFINE_MUTEX(devcgroup_mutex); static DEFINE_MUTEX(devcgroup_mutex);
enum devcg_behavior { enum devcg_behavior {
...@@ -246,21 +237,21 @@ static void set_access(char *acc, short access) ...@@ -246,21 +237,21 @@ static void set_access(char *acc, short access)
{ {
int idx = 0; int idx = 0;
memset(acc, 0, ACCLEN); memset(acc, 0, ACCLEN);
if (access & ACC_READ) if (access & DEVCG_ACC_READ)
acc[idx++] = 'r'; acc[idx++] = 'r';
if (access & ACC_WRITE) if (access & DEVCG_ACC_WRITE)
acc[idx++] = 'w'; acc[idx++] = 'w';
if (access & ACC_MKNOD) if (access & DEVCG_ACC_MKNOD)
acc[idx++] = 'm'; acc[idx++] = 'm';
} }
static char type_to_char(short type) static char type_to_char(short type)
{ {
if (type == DEV_ALL) if (type == DEVCG_DEV_ALL)
return 'a'; return 'a';
if (type == DEV_CHAR) if (type == DEVCG_DEV_CHAR)
return 'c'; return 'c';
if (type == DEV_BLOCK) if (type == DEVCG_DEV_BLOCK)
return 'b'; return 'b';
return 'X'; return 'X';
} }
...@@ -287,10 +278,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) ...@@ -287,10 +278,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
* This way, the file remains as a "whitelist of devices" * This way, the file remains as a "whitelist of devices"
*/ */
if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
set_access(acc, ACC_MASK); set_access(acc, DEVCG_ACC_MASK);
set_majmin(maj, ~0); set_majmin(maj, ~0);
set_majmin(min, ~0); set_majmin(min, ~0);
seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL), seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL),
maj, min, acc); maj, min, acc);
} else { } else {
list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) { list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
...@@ -309,10 +300,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v) ...@@ -309,10 +300,10 @@ static int devcgroup_seq_show(struct seq_file *m, void *v)
/** /**
* match_exception - iterates the exception list trying to find a complete match * match_exception - iterates the exception list trying to find a complete match
* @exceptions: list of exceptions * @exceptions: list of exceptions
* @type: device type (DEV_BLOCK or DEV_CHAR) * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
* @major: device file major number, ~0 to match all * @major: device file major number, ~0 to match all
* @minor: device file minor number, ~0 to match all * @minor: device file minor number, ~0 to match all
* @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
* *
* It is considered a complete match if an exception is found that will * It is considered a complete match if an exception is found that will
* contain the entire range of provided parameters. * contain the entire range of provided parameters.
...@@ -325,9 +316,9 @@ static bool match_exception(struct list_head *exceptions, short type, ...@@ -325,9 +316,9 @@ static bool match_exception(struct list_head *exceptions, short type,
struct dev_exception_item *ex; struct dev_exception_item *ex;
list_for_each_entry_rcu(ex, exceptions, list) { list_for_each_entry_rcu(ex, exceptions, list) {
if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
continue; continue;
if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
continue; continue;
if (ex->major != ~0 && ex->major != major) if (ex->major != ~0 && ex->major != major)
continue; continue;
...@@ -344,10 +335,10 @@ static bool match_exception(struct list_head *exceptions, short type, ...@@ -344,10 +335,10 @@ static bool match_exception(struct list_head *exceptions, short type,
/** /**
* match_exception_partial - iterates the exception list trying to find a partial match * match_exception_partial - iterates the exception list trying to find a partial match
* @exceptions: list of exceptions * @exceptions: list of exceptions
* @type: device type (DEV_BLOCK or DEV_CHAR) * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
* @major: device file major number, ~0 to match all * @major: device file major number, ~0 to match all
* @minor: device file minor number, ~0 to match all * @minor: device file minor number, ~0 to match all
* @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
* *
* It is considered a partial match if an exception's range is found to * It is considered a partial match if an exception's range is found to
* contain *any* of the devices specified by provided parameters. This is * contain *any* of the devices specified by provided parameters. This is
...@@ -362,9 +353,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type, ...@@ -362,9 +353,9 @@ static bool match_exception_partial(struct list_head *exceptions, short type,
struct dev_exception_item *ex; struct dev_exception_item *ex;
list_for_each_entry_rcu(ex, exceptions, list) { list_for_each_entry_rcu(ex, exceptions, list) {
if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
continue; continue;
if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
continue; continue;
/* /*
* We must be sure that both the exception and the provided * We must be sure that both the exception and the provided
...@@ -647,10 +638,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, ...@@ -647,10 +638,10 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
} }
return 0; return 0;
case 'b': case 'b':
ex.type = DEV_BLOCK; ex.type = DEVCG_DEV_BLOCK;
break; break;
case 'c': case 'c':
ex.type = DEV_CHAR; ex.type = DEVCG_DEV_CHAR;
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -703,13 +694,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, ...@@ -703,13 +694,13 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
for (b++, count = 0; count < 3; count++, b++) { for (b++, count = 0; count < 3; count++, b++) {
switch (*b) { switch (*b) {
case 'r': case 'r':
ex.access |= ACC_READ; ex.access |= DEVCG_ACC_READ;
break; break;
case 'w': case 'w':
ex.access |= ACC_WRITE; ex.access |= DEVCG_ACC_WRITE;
break; break;
case 'm': case 'm':
ex.access |= ACC_MKNOD; ex.access |= DEVCG_ACC_MKNOD;
break; break;
case '\n': case '\n':
case '\0': case '\0':
...@@ -806,12 +797,12 @@ struct cgroup_subsys devices_cgrp_subsys = { ...@@ -806,12 +797,12 @@ struct cgroup_subsys devices_cgrp_subsys = {
* @type: device type * @type: device type
* @major: device major number * @major: device major number
* @minor: device minor number * @minor: device minor number
* @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD
* *
* returns 0 on success, -EPERM case the operation is not permitted * returns 0 on success, -EPERM case the operation is not permitted
*/ */
static int __devcgroup_check_permission(short type, u32 major, u32 minor, int __devcgroup_check_permission(short type, u32 major, u32 minor,
short access) short access)
{ {
struct dev_cgroup *dev_cgroup; struct dev_cgroup *dev_cgroup;
bool rc; bool rc;
...@@ -833,37 +824,3 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, ...@@ -833,37 +824,3 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor,
return 0; return 0;
} }
int __devcgroup_inode_permission(struct inode *inode, int mask)
{
short type, access = 0;
if (S_ISBLK(inode->i_mode))
type = DEV_BLOCK;
if (S_ISCHR(inode->i_mode))
type = DEV_CHAR;
if (mask & MAY_WRITE)
access |= ACC_WRITE;
if (mask & MAY_READ)
access |= ACC_READ;
return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
access);
}
int devcgroup_inode_mknod(int mode, dev_t dev)
{
short type;
if (!S_ISBLK(mode) && !S_ISCHR(mode))
return 0;
if (S_ISBLK(mode))
type = DEV_BLOCK;
else
type = DEV_CHAR;
return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
ACC_MKNOD);
}
...@@ -131,6 +131,7 @@ enum bpf_prog_type { ...@@ -131,6 +131,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS, BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
}; };
enum bpf_attach_type { enum bpf_attach_type {
...@@ -140,6 +141,7 @@ enum bpf_attach_type { ...@@ -140,6 +141,7 @@ enum bpf_attach_type {
BPF_CGROUP_SOCK_OPS, BPF_CGROUP_SOCK_OPS,
BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT, BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
...@@ -990,4 +992,17 @@ struct bpf_perf_event_value { ...@@ -990,4 +992,17 @@ struct bpf_perf_event_value {
__u64 running; __u64 running;
}; };
#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
#define BPF_DEVCG_ACC_READ (1ULL << 1)
#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
struct bpf_cgroup_dev_ctx {
__u32 access_type; /* (access << 16) | type */
__u32 major;
__u32 minor;
};
#endif /* _UAPI__LINUX_BPF_H__ */ #endif /* _UAPI__LINUX_BPF_H__ */
...@@ -13,17 +13,17 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i ...@@ -13,17 +13,17 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i
LDLIBS += -lcap -lelf LDLIBS += -lcap -lelf
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_align test_verifier_log test_dev_cgroup
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o sockmap_verdict_prog.o dev_cgroup.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
include ../lib.mk include ../lib.mk
BPFOBJ := $(OUTPUT)/libbpf.a BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c
$(TEST_GEN_PROGS): $(BPFOBJ) $(TEST_GEN_PROGS): $(BPFOBJ)
......
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
#include <linux/version.h>
#include "bpf_helpers.h"
SEC("cgroup/dev")
int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
{
short type = ctx->access_type & 0xFFFF;
#ifdef DEBUG
short access = ctx->access_type >> 16;
char fmt[] = " %d:%d \n";
switch (type) {
case BPF_DEVCG_DEV_BLOCK:
fmt[0] = 'b';
break;
case BPF_DEVCG_DEV_CHAR:
fmt[0] = 'c';
break;
default:
fmt[0] = '?';
break;
}
if (access & BPF_DEVCG_ACC_READ)
fmt[8] = 'r';
if (access & BPF_DEVCG_ACC_WRITE)
fmt[9] = 'w';
if (access & BPF_DEVCG_ACC_MKNOD)
fmt[10] = 'm';
bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
#endif
/* Allow access to /dev/zero and /dev/random.
* Forbid everything else.
*/
if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
return 0;
switch (ctx->minor) {
case 5: /* 1:5 /dev/zero */
case 9: /* 1:9 /dev/urandom */
return 1;
}
return 0;
}
char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = LINUX_VERSION_CODE;
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#define DEV_CGROUP_PROG "./dev_cgroup.o"
#define TEST_CGROUP "test-bpf-based-device-cgroup/"
int main(int argc, char **argv)
{
struct bpf_object *obj;
int error = EXIT_FAILURE;
int prog_fd, cgroup_fd;
__u32 prog_cnt;
if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
&obj, &prog_fd)) {
printf("Failed to load DEV_CGROUP program\n");
goto err;
}
if (setup_cgroup_environment()) {
printf("Failed to load DEV_CGROUP program\n");
goto err;
}
/* Create a cgroup, get fd, and join it */
cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
if (!cgroup_fd) {
printf("Failed to create test cgroup\n");
goto err;
}
if (join_cgroup(TEST_CGROUP)) {
printf("Failed to join cgroup\n");
goto err;
}
/* Attach bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
printf("Failed to attach DEV_CGROUP program");
goto err;
}
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
&prog_cnt)) {
printf("Failed to query attached programs");
goto err;
}
/* All operations with /dev/zero and and /dev/urandom are allowed,
* everything else is forbidden.
*/
assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
/* /dev/zero is whitelisted */
assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
/* src is allowed, target is forbidden */
assert(system("dd if=/dev/urandom of=/dev/full count=64"));
/* src is forbidden, target is allowed */
assert(system("dd if=/dev/random of=/dev/zero count=64"));
error = 0;
printf("test_dev_cgroup:PASS\n");
err:
cleanup_cgroup_environment();
return error;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment