Commit a43d0508 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-sysctl-hook'

Andrey Ignatov says:

====================
v2->v3:
- simplify C based selftests by relying on variable offset stack access.

v1->v2:
- add fs/proc/proc_sysctl.c mainteners to Cc:.

The patch set introduces new BPF hook for sysctl.

It adds new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type
BPF_CGROUP_SYSCTL.

BPF_CGROUP_SYSCTL hook is placed before calling to sysctl's proc_handler so
that accesses (read/write) to sysctl can be controlled for specific cgroup
and either allowed or denied, or traced.

The hook has access to sysctl name, current sysctl value and (on write
only) to new sysctl value via corresponding helpers. New sysctl value can
be overridden by program. Both name and values (current/new) are
represented as strings same way they're visible in /proc/sys/. It is up to
program to parse these strings.

To help with parsing the most common kind of sysctl value, vector of
integers, two new helpers are provided: bpf_strtol and bpf_strtoul with
semantic similar to user space strtol(3) and strtoul(3).

The hook also provides bpf_sysctl context with two fields:
* @write indicates whether sysctl is being read (= 0) or written (= 1);
* @file_pos is sysctl file position to read from or write to, can be
  overridden.

The hook allows to make better isolation for containerized applications
that are run as root so that one container can't change a sysctl and affect
all other containers on a host, make changes to allowed sysctl in a safer
way and simplify sysctl tracing for cgroups.

Patch 1 is preliminary refactoring.
Patch 2 adds new program and attach types.
Patches 3-5 implement helpers to access sysctl name and value.
Patch 6 adds file_pos field to bpf_sysctl context.
Patch 7 updates UAPI in tools.
Patches 8-9 add support for the new hook to libbpf and corresponding test.
Patches 10-14 add selftests for the new hook.
Patch 15 adds support for new arg types to verifier: pointer to integer.
Patch 16 adds bpf_strto{l,ul} helpers to parse integers from sysctl value.
Patch 17 updates UAPI in tools.
Patch 18 updates bpf_helpers.h.
Patch 19 adds selftests for pointer to integer in verifier.
Patches 20-21 add selftests for bpf_strto{l,ul}, including integration
              C based test for sysctl value parsing.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e0a092eb 7568f4cb
......@@ -13,6 +13,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
......@@ -569,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
void *new_buf = NULL;
ssize_t error;
size_t res;
if (IS_ERR(head))
return PTR_ERR(head);
......@@ -588,11 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
if (!table->proc_handler)
goto out;
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
ppos, &new_buf);
if (error)
goto out;
/* careful: calling conventions are nasty here */
res = count;
error = table->proc_handler(table, write, buf, &res, ppos);
if (new_buf) {
mm_segment_t old_fs;
old_fs = get_fs();
set_fs(KERNEL_DS);
error = table->proc_handler(table, write, (void __user *)new_buf,
&count, ppos);
set_fs(old_fs);
kfree(new_buf);
} else {
error = table->proc_handler(table, write, buf, &count, ppos);
}
if (!error)
error = res;
error = count;
out:
sysctl_head_finish(head);
......
......@@ -17,6 +17,8 @@ struct bpf_map;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;
#ifdef CONFIG_CGROUP_BPF
......@@ -109,6 +111,12 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
void __user *buf, size_t *pcount,
loff_t *ppos, void **new_buf,
enum bpf_attach_type type);
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
......@@ -253,6 +261,18 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
\
__ret; \
})
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, pos, nbuf, \
BPF_CGROUP_SYSCTL); \
__ret; \
})
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
......@@ -321,6 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
#define for_each_cgroup_storage_type(stype) for (; false; )
......
......@@ -202,6 +202,8 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
};
/* type of values returned from helper functions */
......@@ -987,6 +989,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
extern const struct bpf_func_proto bpf_spin_lock_proto;
extern const struct bpf_func_proto bpf_spin_unlock_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;
/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
......
......@@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
......
......@@ -33,6 +33,8 @@ struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
struct ctl_table;
struct ctl_table_header;
/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
......@@ -1177,4 +1179,18 @@ struct bpf_sock_ops_kern {
*/
};
struct bpf_sysctl_kern {
struct ctl_table_header *head;
struct ctl_table *table;
void *cur_val;
size_t cur_len;
void *new_val;
size_t new_len;
int new_updated;
int write;
loff_t *ppos;
/* Temporary "register" for indirect stores to ppos. */
u64 tmp_reg;
};
#endif /* __LINUX_FILTER_H__ */
......@@ -167,6 +167,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
};
enum bpf_attach_type {
......@@ -188,6 +189,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
BPF_CGROUP_SYSCTL,
__MAX_BPF_ATTACH_TYPE
};
......@@ -2504,6 +2506,122 @@ union bpf_attr {
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
*
* int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
* Description
* Get name of sysctl in /proc/sys/ and copy it into provided by
* program buffer *buf* of size *buf_len*.
*
* The buffer is always NUL terminated, unless it's zero-sized.
*
* If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
* copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
* only (e.g. "tcp_mem").
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get current value of sysctl as it is presented in /proc/sys
* (incl. newline, etc), and copy it as a string into provided
* by program buffer *buf* of size *buf_len*.
*
* The whole value is copied, no matter what file position user
* space issued e.g. sys_read at.
*
* The buffer is always NUL terminated, unless it's zero-sized.
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
* int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
* provided by program buffer *buf* of size *buf_len*.
*
* User space may write new value at file position > 0.
*
* The buffer is always NUL terminated, unless it's zero-sized.
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* **-EINVAL** if sysctl is being read.
*
* int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
*
* *buf* should contain a string in same form as provided by user
* space on sysctl write.
*
* User space may write new value at file position > 0. To override
* the whole sysctl value file position should be set to zero.
* Return
* 0 on success.
*
* **-E2BIG** if the *buf_len* is too big.
*
* **-EINVAL** if sysctl is being read.
*
* int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to a long integer according to the given base
* and save the result in *res*.
*
* The string may begin with an arbitrary amount of white space
* (as determined by isspace(3)) followed by a single optional '-'
* sign.
*
* Five least significant bits of *flags* encode base, other bits
* are currently unused.
*
* Base must be either 8, 10, 16 or 0 to detect it automatically
* similar to user space strtol(3).
* Return
* Number of characters consumed on success. Must be positive but
* no more than buf_len.
*
* **-EINVAL** if no valid digits were found or unsupported base
* was provided.
*
* **-ERANGE** if resulting value was out of range.
*
* int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to an unsigned long integer according to the
* given base and save the result in *res*.
*
* The string may begin with an arbitrary amount of white space
* (as determined by isspace(3)).
*
* Five least significant bits of *flags* encode base, other bits
* are currently unused.
*
* Base must be either 8, 10, 16 or 0 to detect it automatically
* similar to user space strtoul(3).
* Return
* Number of characters consumed on success. Must be positive but
* no more than buf_len.
*
* **-EINVAL** if no valid digits were found or unsupported base
* was provided.
*
* **-ERANGE** if resulting value was out of range.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2606,7 +2724,13 @@ union bpf_attr {
FN(skb_ecn_set_ce), \
FN(get_listener_sock), \
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie),
FN(tcp_check_syncookie), \
FN(sysctl_get_name), \
FN(sysctl_get_current_value), \
FN(sysctl_get_new_value), \
FN(sysctl_set_new_value), \
FN(strtol), \
FN(strtoul),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -2679,6 +2803,9 @@ enum bpf_func_id {
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
......@@ -3308,4 +3435,14 @@ struct bpf_line_info {
struct bpf_spin_lock {
__u32 val;
};
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
*/
__u32 file_pos; /* Sysctl file position to read from, write to.
* Allows 1,2,4-byte read an 4-byte write.
*/
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -11,7 +11,10 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
#include <linux/filter.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
......@@ -701,7 +704,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
......@@ -725,6 +728,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return cgroup_base_func_proto(func_id, prog);
}
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
......@@ -762,3 +771,356 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
/**
* __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
*
* @head: sysctl table header
* @table: sysctl table
* @write: sysctl is being read (= 0) or written (= 1)
* @buf: pointer to buffer passed by user space
* @pcount: value-result argument: value is size of buffer pointed to by @buf,
* result is size of @new_buf if program set new value, initial value
* otherwise
* @ppos: value-result argument: value is position at which read from or write
* to sysctl is happening, result is new position if program overrode it,
* initial value otherwise
* @new_buf: pointer to pointer to new buffer that will be allocated if program
* overrides new value provided by user space on sysctl write
* NOTE: it's caller responsibility to free *new_buf if it was set
* @type: type of program to be executed
*
* Program is run when sysctl is being accessed, either read or written, and
* can allow or deny such access.
*
* This function will return %-EPERM if an attached program is found and
* returned value != 1 during execution. In all other cases 0 is returned.
*/
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
void __user *buf, size_t *pcount,
loff_t *ppos, void **new_buf,
enum bpf_attach_type type)
{
struct bpf_sysctl_kern ctx = {
.head = head,
.table = table,
.write = write,
.ppos = ppos,
.cur_val = NULL,
.cur_len = PAGE_SIZE,
.new_val = NULL,
.new_len = 0,
.new_updated = 0,
};
struct cgroup *cgrp;
int ret;
ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
if (ctx.cur_val) {
mm_segment_t old_fs;
loff_t pos = 0;
old_fs = get_fs();
set_fs(KERNEL_DS);
if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
&ctx.cur_len, &pos)) {
/* Let BPF program decide how to proceed. */
ctx.cur_len = 0;
}
set_fs(old_fs);
} else {
/* Let BPF program decide how to proceed. */
ctx.cur_len = 0;
}
if (write && buf && *pcount) {
/* BPF program should be able to override new value with a
* buffer bigger than provided by user.
*/
ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
ctx.new_len = min(PAGE_SIZE, *pcount);
if (!ctx.new_val ||
copy_from_user(ctx.new_val, buf, ctx.new_len))
/* Let BPF program decide how to proceed. */
ctx.new_len = 0;
}
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
rcu_read_unlock();
kfree(ctx.cur_val);
if (ret == 1 && ctx.new_updated) {
*new_buf = ctx.new_val;
*pcount = ctx.new_len;
} else {
kfree(ctx.new_val);
}
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
size_t *lenp)
{
ssize_t tmp_ret = 0, ret;
if (dir->header.parent) {
tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
if (tmp_ret < 0)
return tmp_ret;
}
ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
if (ret < 0)
return ret;
*bufp += ret;
*lenp -= ret;
ret += tmp_ret;
/* Avoid leading slash. */
if (!ret)
return ret;
tmp_ret = strscpy(*bufp, "/", *lenp);
if (tmp_ret < 0)
return tmp_ret;
*bufp += tmp_ret;
*lenp -= tmp_ret;
return ret + tmp_ret;
}
BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
size_t, buf_len, u64, flags)
{
ssize_t tmp_ret = 0, ret;
if (!buf)
return -EINVAL;
if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
if (!ctx->head)
return -EINVAL;
tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
if (tmp_ret < 0)
return tmp_ret;
}
ret = strscpy(buf, ctx->table->procname, buf_len);
return ret < 0 ? ret : tmp_ret + ret;
}
static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
.func = bpf_sysctl_get_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
size_t src_len)
{
if (!dst)
return -EINVAL;
if (!dst_len)
return -E2BIG;
if (!src || !src_len) {
memset(dst, 0, dst_len);
return -EINVAL;
}
memcpy(dst, src, min(dst_len, src_len));
if (dst_len > src_len) {
memset(dst + src_len, '\0', dst_len - src_len);
return src_len;
}
dst[dst_len - 1] = '\0';
return -E2BIG;
}
BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
char *, buf, size_t, buf_len)
{
return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
}
static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
.func = bpf_sysctl_get_current_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
size_t, buf_len)
{
if (!ctx->write) {
if (buf && buf_len)
memset(buf, '\0', buf_len);
return -EINVAL;
}
return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
}
static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
.func = bpf_sysctl_get_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
const char *, buf, size_t, buf_len)
{
if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
return -EINVAL;
if (buf_len > PAGE_SIZE - 1)
return -E2BIG;
memcpy(ctx->new_val, buf, buf_len);
ctx->new_len = buf_len;
ctx->new_updated = 1;
return 0;
}
static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
.func = bpf_sysctl_set_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_strtol:
return &bpf_strtol_proto;
case BPF_FUNC_strtoul:
return &bpf_strtoul_proto;
case BPF_FUNC_sysctl_get_name:
return &bpf_sysctl_get_name_proto;
case BPF_FUNC_sysctl_get_current_value:
return &bpf_sysctl_get_current_value_proto;
case BPF_FUNC_sysctl_get_new_value:
return &bpf_sysctl_get_new_value_proto;
case BPF_FUNC_sysctl_set_new_value:
return &bpf_sysctl_set_new_value_proto;
default:
return cgroup_base_func_proto(func_id, prog);
}
}
static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
return false;
switch (off) {
case offsetof(struct bpf_sysctl, write):
if (type != BPF_READ)
return false;
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
case offsetof(struct bpf_sysctl, file_pos):
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
} else {
return size == size_default;
}
default:
return false;
}
}
static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct bpf_sysctl, write):
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sysctl_kern, write,
FIELD_SIZEOF(struct bpf_sysctl_kern,
write),
target_size));
break;
case offsetof(struct bpf_sysctl, file_pos):
/* ppos is a pointer so it should be accessed via indirect
* loads and stores. Also for stores additional temporary
* register is used since neither src_reg nor dst_reg can be
* overridden.
*/
if (type == BPF_WRITE) {
int treg = BPF_REG_9;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
*insn++ = BPF_STX_MEM(
BPF_DW, si->dst_reg, treg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, ppos));
*insn++ = BPF_STX_MEM(
BPF_SIZEOF(u32), treg, si->src_reg, 0);
*insn++ = BPF_LDX_MEM(
BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
} else {
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sysctl_kern, ppos));
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
}
*target_size = sizeof(u32);
break;
}
return insn - insn_buf;
}
const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
.get_func_proto = sysctl_func_proto,
.is_valid_access = sysctl_is_valid_access,
.convert_ctx_access = sysctl_convert_ctx_access,
};
const struct bpf_prog_ops cg_sysctl_prog_ops = {
};
......@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
#include <linux/ctype.h>
#include "../../lib/kstrtox.h"
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
......@@ -363,4 +366,132 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
#define BPF_STRTOX_BASE_MASK 0x1F
static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
unsigned long long *res, bool *is_negative)
{
unsigned int base = flags & BPF_STRTOX_BASE_MASK;
const char *cur_buf = buf;
size_t cur_len = buf_len;
unsigned int consumed;
size_t val_len;
char str[64];
if (!buf || !buf_len || !res || !is_negative)
return -EINVAL;
if (base != 0 && base != 8 && base != 10 && base != 16)
return -EINVAL;
if (flags & ~BPF_STRTOX_BASE_MASK)
return -EINVAL;
while (cur_buf < buf + buf_len && isspace(*cur_buf))
++cur_buf;
*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
if (*is_negative)
++cur_buf;
consumed = cur_buf - buf;
cur_len -= consumed;
if (!cur_len)
return -EINVAL;
cur_len = min(cur_len, sizeof(str) - 1);
memcpy(str, cur_buf, cur_len);
str[cur_len] = '\0';
cur_buf = str;
cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
val_len = _parse_integer(cur_buf, base, res);
if (val_len & KSTRTOX_OVERFLOW)
return -ERANGE;
if (val_len == 0)
return -EINVAL;
cur_buf += val_len;
consumed += cur_buf - str;
return consumed;
}
static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
long long *res)
{
unsigned long long _res;
bool is_negative;
int err;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative) {
if ((long long)-_res > 0)
return -ERANGE;
*res = -_res;
} else {
if ((long long)_res < 0)
return -ERANGE;
*res = _res;
}
return err;
}
BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
long *, res)
{
long long _res;
int err;
err = __bpf_strtoll(buf, buf_len, flags, &_res);
if (err < 0)
return err;
if (_res != (long)_res)
return -ERANGE;
*res = _res;
return err;
}
const struct bpf_func_proto bpf_strtol_proto = {
.func = bpf_strtol,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
};
BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
unsigned long *, res)
{
unsigned long long _res;
bool is_negative;
int err;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative)
return -EINVAL;
if (_res != (unsigned long)_res)
return -ERANGE;
*res = _res;
return err;
}
const struct bpf_func_proto bpf_strtoul_proto = {
.func = bpf_strtoul,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
};
#endif
......@@ -1888,6 +1888,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_FLOW_DISSECTOR:
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
break;
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
default:
return -EINVAL;
}
......@@ -1966,6 +1969,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return lirc_prog_detach(attr);
case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_bpf_prog_detach(attr);
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
default:
return -EINVAL;
}
......@@ -1999,6 +2005,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
break;
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
......
......@@ -2462,6 +2462,22 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
static bool arg_type_is_int_ptr(enum bpf_arg_type type)
{
return type == ARG_PTR_TO_INT ||
type == ARG_PTR_TO_LONG;
}
static int int_ptr_type_to_size(enum bpf_arg_type type)
{
if (type == ARG_PTR_TO_INT)
return sizeof(u32);
else if (type == ARG_PTR_TO_LONG)
return sizeof(u64);
return -EINVAL;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
......@@ -2554,6 +2570,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
} else if (arg_type_is_int_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) &&
type != PTR_TO_MAP_VALUE &&
type != expected_type)
goto err_type;
} else {
verbose(env, "unsupported arg_type %d\n", arg_type);
return -EFAULT;
......@@ -2635,6 +2657,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_helper_mem_access(env, regno - 1,
reg->umax_value,
zero_size_allowed, meta);
} else if (arg_type_is_int_ptr(arg_type)) {
int size = int_ptr_type_to_size(arg_type);
err = check_helper_mem_access(env, regno, size, false, meta);
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
}
return err;
......@@ -5267,6 +5296,7 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
break;
default:
return 0;
......
......@@ -167,6 +167,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
BPF_PROG_TYPE_CGROUP_SYSCTL,
};
enum bpf_attach_type {
......@@ -188,6 +189,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
BPF_FLOW_DISSECTOR,
BPF_CGROUP_SYSCTL,
__MAX_BPF_ATTACH_TYPE
};
......@@ -2504,6 +2506,122 @@ union bpf_attr {
* Return
* 0 if iph and th are a valid SYN cookie ACK, or a negative error
* otherwise.
*
* int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
* Description
* Get name of sysctl in /proc/sys/ and copy it into provided by
* program buffer *buf* of size *buf_len*.
*
* The buffer is always NUL terminated, unless it's zero-sized.
*
* If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
* copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
* only (e.g. "tcp_mem").
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get current value of sysctl as it is presented in /proc/sys
* (incl. newline, etc), and copy it as a string into provided
* by program buffer *buf* of size *buf_len*.
*
* The whole value is copied, no matter what file position user
* space issued e.g. sys_read at.
*
* The buffer is always NUL terminated, unless it's zero-sized.
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
* int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
* provided by program buffer *buf* of size *buf_len*.
*
* User space may write new value at file position > 0.
*
* The buffer is always NUL terminated, unless it's zero-sized.
* Return
* Number of character copied (not including the trailing NUL).
*
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
* **-EINVAL** if sysctl is being read.
*
* int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
*
* *buf* should contain a string in same form as provided by user
* space on sysctl write.
*
* User space may write new value at file position > 0. To override
* the whole sysctl value file position should be set to zero.
* Return
* 0 on success.
*
* **-E2BIG** if the *buf_len* is too big.
*
* **-EINVAL** if sysctl is being read.
*
* int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to a long integer according to the given base
* and save the result in *res*.
*
* The string may begin with an arbitrary amount of white space
* (as determined by isspace(3)) followed by a single optional '-'
* sign.
*
* Five least significant bits of *flags* encode base, other bits
* are currently unused.
*
* Base must be either 8, 10, 16 or 0 to detect it automatically
* similar to user space strtol(3).
* Return
* Number of characters consumed on success. Must be positive but
* no more than buf_len.
*
* **-EINVAL** if no valid digits were found or unsupported base
* was provided.
*
* **-ERANGE** if resulting value was out of range.
*
* int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to an unsigned long integer according to the
* given base and save the result in *res*.
*
* The string may begin with an arbitrary amount of white space
* (as determined by isspace(3)).
*
* Five least significant bits of *flags* encode base, other bits
* are currently unused.
*
* Base must be either 8, 10, 16 or 0 to detect it automatically
* similar to user space strtoul(3).
* Return
* Number of characters consumed on success. Must be positive but
* no more than buf_len.
*
* **-EINVAL** if no valid digits were found or unsupported base
* was provided.
*
* **-ERANGE** if resulting value was out of range.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -2606,7 +2724,13 @@ union bpf_attr {
FN(skb_ecn_set_ce), \
FN(get_listener_sock), \
FN(skc_lookup_tcp), \
FN(tcp_check_syncookie),
FN(tcp_check_syncookie), \
FN(sysctl_get_name), \
FN(sysctl_get_current_value), \
FN(sysctl_get_new_value), \
FN(sysctl_set_new_value), \
FN(strtol), \
FN(strtoul),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -2679,6 +2803,9 @@ enum bpf_func_id {
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
......@@ -3308,4 +3435,14 @@ struct bpf_line_info {
struct bpf_spin_lock {
__u32 val;
};
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
*/
__u32 file_pos; /* Sysctl file position to read from, write to.
* Allows 1,2,4-byte read an 4-byte write.
*/
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -2064,6 +2064,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
return false;
case BPF_PROG_TYPE_KPROBE:
default:
......@@ -3004,6 +3005,8 @@ static const struct {
BPF_CGROUP_UDP4_SENDMSG),
BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_CGROUP_UDP6_SENDMSG),
BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_CGROUP_SYSCTL),
};
#undef BPF_PROG_SEC_IMPL
......
......@@ -97,6 +97,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
default:
break;
}
......
......@@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
test_netcnt test_tcpnotify_user test_sock_fields
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
......@@ -93,6 +93,7 @@ $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
.PHONY: force
......
......@@ -192,6 +192,25 @@ static int (*bpf_skb_ecn_set_ce)(void *ctx) =
static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk,
void *ip, int ip_len, void *tcp, int tcp_len) =
(void *) BPF_FUNC_tcp_check_syncookie;
static int (*bpf_sysctl_get_name)(void *ctx, char *buf,
unsigned long long buf_len,
unsigned long long flags) =
(void *) BPF_FUNC_sysctl_get_name;
static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf,
unsigned long long buf_len) =
(void *) BPF_FUNC_sysctl_get_current_value;
static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf,
unsigned long long buf_len) =
(void *) BPF_FUNC_sysctl_get_new_value;
static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf,
unsigned long long buf_len) =
(void *) BPF_FUNC_sysctl_set_new_value;
static int (*bpf_strtol)(const char *buf, unsigned long long buf_len,
unsigned long long flags, long *res) =
(void *) BPF_FUNC_strtol;
static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len,
unsigned long long flags, unsigned long *res) =
(void *) BPF_FUNC_strtoul;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <stdint.h>
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#include "bpf_util.h"
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
/* Max supported length of sysctl value string (pow2). */
#define MAX_VALUE_STR_LEN 0x40
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
char name[64];
int ret;
memset(name, 0, sizeof(name));
ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
#pragma clang loop unroll(full)
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
return 1;
}
SEC("cgroup/sysctl")
int sysctl_tcp_mem(struct bpf_sysctl *ctx)
{
unsigned long tcp_mem[3] = {0, 0, 0};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
int ret;
if (ctx->write)
return 0;
if (!is_tcp_mem(ctx))
return 0;
ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
#pragma clang loop unroll(full)
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
return 0;
off += ret & MAX_ULONG_STR_LEN;
}
return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
}
char _license[] SEC("license") = "GPL";
......@@ -119,6 +119,11 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG},
{0, BPF_CGROUP_UDP6_SENDMSG},
},
{
"cgroup/sysctl",
{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
{0, BPF_CGROUP_SYSCTL},
},
};
static int test_prog_type_by_name(const struct sec_name_test *test)
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/filter.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
#define FIXUP_SYSCTL_VALUE 0
char bpf_log_buf[BPF_LOG_BUF_SIZE];
struct sysctl_test {
const char *descr;
size_t fixup_value_insn;
struct bpf_insn insns[MAX_INSNS];
const char *prog_file;
enum bpf_attach_type attach_type;
const char *sysctl;
int open_flags;
const char *newval;
const char *oldval;
enum {
LOAD_REJECT,
ATTACH_REJECT,
OP_EPERM,
SUCCESS,
} result;
};
static struct sysctl_test tests[] = {
{
.descr = "sysctl wrong attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = 0,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = ATTACH_REJECT,
},
{
.descr = "sysctl:read allow all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl:read deny all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = OP_EPERM,
},
{
.descr = "ctx:write sysctl:read read ok",
.insns = {
/* If (write) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "ctx:write sysctl:write read ok",
.insns = {
/* If (write) */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/domainname",
.open_flags = O_WRONLY,
.newval = "(none)", /* same as default, should fail anyway */
.result = OP_EPERM,
},
{
.descr = "ctx:write sysctl:read write reject",
.insns = {
/* write = X */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct bpf_sysctl, write)),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = LOAD_REJECT,
},
{
.descr = "ctx:file_pos sysctl:read read ok",
.insns = {
/* If (file_pos == X) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "ctx:file_pos sysctl:read read ok narrow",
.insns = {
/* If (file_pos == X) */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "ctx:file_pos sysctl:read write ok",
.insns = {
/* file_pos = X */
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct bpf_sysctl, file_pos)),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.oldval = "nux\n",
.result = SUCCESS,
},
{
.descr = "sysctl_get_name sysctl_value:base ok",
.insns = {
/* sysctl_get_name arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_name arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* sysctl_get_name arg4 (flags) */
BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
/* sysctl_get_name(ctx, buf, buf_len, flags) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6),
/* buf == "tcp_mem\0") */
BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_name sysctl_value:base E2BIG truncated",
.insns = {
/* sysctl_get_name arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_name arg3 (buf_len) too small */
BPF_MOV64_IMM(BPF_REG_3, 7),
/* sysctl_get_name arg4 (flags) */
BPF_MOV64_IMM(BPF_REG_4, BPF_F_SYSCTL_BASE_NAME),
/* sysctl_get_name(ctx, buf, buf_len, flags) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:7] == "tcp_me\0") */
BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_name sysctl:full ok",
.insns = {
/* sysctl_get_name arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_name arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 17),
/* sysctl_get_name arg4 (flags) */
BPF_MOV64_IMM(BPF_REG_4, 0),
/* sysctl_get_name(ctx, buf, buf_len, flags) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14),
/* buf[0:8] == "net/ipv4" && */
BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "/tcp_mem" && */
BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[16:24] == "\0") */
BPF_LD_IMM64(BPF_REG_8, 0x0ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_name sysctl:full E2BIG truncated",
.insns = {
/* sysctl_get_name arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_name arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 16),
/* sysctl_get_name arg4 (flags) */
BPF_MOV64_IMM(BPF_REG_4, 0),
/* sysctl_get_name(ctx, buf, buf_len, flags) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10),
/* buf[0:8] == "net/ipv4" && */
BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[8:16] == "/tcp_me\0") */
BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_name sysctl:full E2BIG truncated small",
.insns = {
/* sysctl_get_name arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_name arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 7),
/* sysctl_get_name arg4 (flags) */
BPF_MOV64_IMM(BPF_REG_4, 0),
/* sysctl_get_name(ctx, buf, buf_len, flags) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_name),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:8] == "net/ip\0") */
BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_current_value sysctl:read ok, gt",
.insns = {
/* sysctl_get_current_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_current_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* sysctl_get_current_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_current_value sysctl:read ok, eq",
.insns = {
/* sysctl_get_current_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 7),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_current_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 7),
/* sysctl_get_current_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6),
/* buf[0:6] == "Linux\n\0") */
BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_current_value sysctl:read E2BIG truncated",
.insns = {
/* sysctl_get_current_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_H, BPF_REG_7, BPF_REG_0, 6),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_current_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 6),
/* sysctl_get_current_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6),
/* buf[0:6] == "Linux\0") */
BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_current_value sysctl:read EINVAL",
.insns = {
/* sysctl_get_current_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_current_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* sysctl_get_current_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 4),
/* buf[0:8] is NUL-filled) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv6/conf/lo/stable_secret", /* -EIO */
.open_flags = O_RDONLY,
.result = OP_EPERM,
},
{
.descr = "sysctl_get_current_value sysctl:write ok",
.fixup_value_insn = 6,
.insns = {
/* sysctl_get_current_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_current_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* sysctl_get_current_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_current_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 6),
/* buf[0:4] == expected) */
BPF_LD_IMM64(BPF_REG_8, FIXUP_SYSCTL_VALUE),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_WRONLY,
.newval = "600", /* same as default, should fail anyway */
.result = OP_EPERM,
},
{
.descr = "sysctl_get_new_value sysctl:read EINVAL",
.insns = {
/* sysctl_get_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* sysctl_get_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_get_new_value sysctl:write ok",
.insns = {
/* sysctl_get_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 4),
/* sysctl_get_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
/* buf[0:4] == "606\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_WRONLY,
.newval = "606",
.result = OP_EPERM,
},
{
.descr = "sysctl_get_new_value sysctl:write ok long",
.insns = {
/* sysctl_get_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 24),
/* sysctl_get_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14),
/* buf[0:8] == "3000000 " && */
BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10),
/* buf[8:16] == "4000000 " && */
BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6),
/* buf[16:24] == "6000000\0") */
BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_WRONLY,
.newval = "3000000 4000000 6000000",
.result = OP_EPERM,
},
{
.descr = "sysctl_get_new_value sysctl:write E2BIG",
.insns = {
/* sysctl_get_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_B, BPF_REG_7, BPF_REG_0, 3),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_get_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 3),
/* sysctl_get_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_get_new_value),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 4),
/* buf[0:3] == "60\0") */
BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_WRONLY,
.newval = "606",
.result = OP_EPERM,
},
{
.descr = "sysctl_set_new_value sysctl:read EINVAL",
.insns = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_set_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 3),
/* sysctl_set_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
.descr = "sysctl_set_new_value sysctl:write ok",
.fixup_value_insn = 2,
.insns = {
/* sysctl_set_new_value arg2 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
/* sysctl_set_new_value arg3 (buf_len) */
BPF_MOV64_IMM(BPF_REG_3, 3),
/* sysctl_set_new_value(ctx, buf, buf_len) */
BPF_EMIT_CALL(BPF_FUNC_sysctl_set_new_value),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_WRONLY,
.newval = "606",
.result = SUCCESS,
},
{
"bpf_strtoul one number string",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtoul multi number string",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
/* "600 602\0" */
BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 8),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 18),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 600, 16),
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_0),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 602, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtoul buf_len = 0, reject",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 0),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = LOAD_REJECT,
},
{
"bpf_strtoul supported base, ok",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00373730),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 8),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 63, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtoul unsupported base, EINVAL",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 3),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtoul buf with spaces only, EINVAL",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtoul negative number, EINVAL",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtoul),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -EINVAL, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtol negative number, ok",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 10),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtol),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 3, 4),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, -6, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtol hex number, ok",
.insns = {
/* arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtol),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, 4),
/* res == expected) */
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 254, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtol max long",
.insns = {
/* arg1 (buf) 9223372036854775807 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 19),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtol),
/* if (ret == expected && */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 19, 6),
/* res == expected) */
BPF_LD_IMM64(BPF_REG_8, 0x7fffffffffffffffULL),
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0),
BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"bpf_strtol overflow, ERANGE",
.insns = {
/* arg1 (buf) 9223372036854775808 */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24),
BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8),
BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 19),
/* arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
BPF_EMIT_CALL(BPF_FUNC_strtol),
/* if (ret == expected) */
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -ERANGE, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
{
"C prog: deny all writes",
.prog_file = "./test_sysctl_prog.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_WRONLY,
.newval = "123 456 789",
.result = OP_EPERM,
},
{
"C prog: deny access by name",
.prog_file = "./test_sysctl_prog.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
.result = OP_EPERM,
},
{
"C prog: read tcp_mem",
.prog_file = "./test_sysctl_prog.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
.result = SUCCESS,
},
};
static size_t probe_prog_length(const struct bpf_insn *fp)
{
size_t len;
for (len = MAX_INSNS - 1; len > 0; --len)
if (fp[len].code != 0 || fp[len].imm != 0)
break;
return len + 1;
}
static int fixup_sysctl_value(const char *buf, size_t buf_len,
struct bpf_insn *prog, size_t insn_num)
{
uint32_t value_num = 0;
uint8_t c, i;
if (buf_len > sizeof(value_num)) {
log_err("Value is too big (%zd) to use in fixup", buf_len);
return -1;
}
for (i = 0; i < buf_len; ++i) {
c = buf[i];
value_num |= (c << i * 8);
}
prog[insn_num].imm = value_num;
return 0;
}
static int load_sysctl_prog_insns(struct sysctl_test *test,
const char *sysctl_path)
{
struct bpf_insn *prog = test->insns;
struct bpf_load_program_attr attr;
int ret;
memset(&attr, 0, sizeof(struct bpf_load_program_attr));
attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
attr.insns = prog;
attr.insns_cnt = probe_prog_length(attr.insns);
attr.license = "GPL";
if (test->fixup_value_insn) {
char buf[128];
ssize_t len;
int fd;
fd = open(sysctl_path, O_RDONLY | O_CLOEXEC);
if (fd < 0) {
log_err("open(%s) failed", sysctl_path);
return -1;
}
len = read(fd, buf, sizeof(buf));
if (len == -1) {
log_err("read(%s) failed", sysctl_path);
close(fd);
return -1;
}
close(fd);
if (fixup_sysctl_value(buf, len, prog, test->fixup_value_insn))
return -1;
}
ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
if (ret < 0 && test->result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
}
return ret;
}
static int load_sysctl_prog_file(struct sysctl_test *test)
{
struct bpf_prog_load_attr attr;
struct bpf_object *obj;
int prog_fd;
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = test->prog_file;
attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
if (test->result != LOAD_REJECT)
log_err(">>> Loading program (%s) error.\n",
test->prog_file);
return -1;
}
return prog_fd;
}
static int load_sysctl_prog(struct sysctl_test *test, const char *sysctl_path)
{
return test->prog_file
? load_sysctl_prog_file(test)
: load_sysctl_prog_insns(test, sysctl_path);
}
static int access_sysctl(const char *sysctl_path,
const struct sysctl_test *test)
{
int err = 0;
int fd;
fd = open(sysctl_path, test->open_flags | O_CLOEXEC);
if (fd < 0)
return fd;
if (test->open_flags == O_RDONLY) {
char buf[128];
if (read(fd, buf, sizeof(buf)) == -1)
goto err;
if (test->oldval &&
strncmp(buf, test->oldval, strlen(test->oldval))) {
log_err("Read value %s != %s", buf, test->oldval);
goto err;
}
} else if (test->open_flags == O_WRONLY) {
if (!test->newval) {
log_err("New value for sysctl is not set");
goto err;
}
if (write(fd, test->newval, strlen(test->newval)) == -1)
goto err;
} else {
log_err("Unexpected sysctl access: neither read nor write");
goto err;
}
goto out;
err:
err = -1;
out:
close(fd);
return err;
}
static int run_test_case(int cgfd, struct sysctl_test *test)
{
enum bpf_attach_type atype = test->attach_type;
char sysctl_path[128];
int progfd = -1;
int err = 0;
printf("Test case: %s .. ", test->descr);
snprintf(sysctl_path, sizeof(sysctl_path), "/proc/sys/%s",
test->sysctl);
progfd = load_sysctl_prog(test, sysctl_path);
if (progfd < 0) {
if (test->result == LOAD_REJECT)
goto out;
else
goto err;
}
if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
if (test->result == ATTACH_REJECT)
goto out;
else
goto err;
}
if (access_sysctl(sysctl_path, test) == -1) {
if (test->result == OP_EPERM && errno == EPERM)
goto out;
else
goto err;
}
if (test->result != SUCCESS) {
log_err("Unexpected failure");
goto err;
}
goto out;
err:
err = -1;
out:
/* Detaching w/o checking return code: best effort attempt. */
if (progfd != -1)
bpf_prog_detach(cgfd, atype);
close(progfd);
printf("[%s]\n", err ? "FAIL" : "PASS");
return err;
}
static int run_tests(int cgfd)
{
int passes = 0;
int fails = 0;
int i;
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
if (run_test_case(cgfd, &tests[i]))
++fails;
else
++passes;
}
printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
return fails ? -1 : 0;
}
int main(int argc, char **argv)
{
int cgfd = -1;
int err = 0;
if (setup_cgroup_environment())
goto err;
cgfd = create_and_get_cgroup(CG_PATH);
if (cgfd < 0)
goto err;
if (join_cgroup(CG_PATH))
goto err;
if (run_tests(cgfd))
goto err;
goto out;
err:
err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
return err;
}
{
"ARG_PTR_TO_LONG uninitialized",
.insns = {
/* bpf_strtoul arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* bpf_strtoul arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* bpf_strtoul arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* bpf_strtoul arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
.errstr = "invalid indirect read from stack off -16+0 size 8",
},
{
"ARG_PTR_TO_LONG half-uninitialized",
.insns = {
/* bpf_strtoul arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* bpf_strtoul arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* bpf_strtoul arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* bpf_strtoul arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
.errstr = "invalid indirect read from stack off -16+4 size 8",
},
{
"ARG_PTR_TO_LONG misaligned",
.insns = {
/* bpf_strtoul arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* bpf_strtoul arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* bpf_strtoul arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* bpf_strtoul arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
.errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
},
{
"ARG_PTR_TO_LONG size < sizeof(long)",
.insns = {
/* bpf_strtoul arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* bpf_strtoul arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* bpf_strtoul arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* bpf_strtoul arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
.errstr = "invalid stack type R4 off=-4 access_size=8",
},
{
"ARG_PTR_TO_LONG initialized",
.insns = {
/* bpf_strtoul arg1 (buf) */
BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
/* bpf_strtoul arg2 (buf_len) */
BPF_MOV64_IMM(BPF_REG_2, 4),
/* bpf_strtoul arg3 (flags) */
BPF_MOV64_IMM(BPF_REG_3, 0),
/* bpf_strtoul arg4 (res) */
BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
},
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment