Commit 2cf69d3f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'cgroup-helpers'

Daniel Borkmann says:

====================
This adds various straight-forward helper improvements and additions to BPF
cgroup based connect(), sendmsg(), recvmsg() and bind-related hooks which
would allow to implement more fine-grained policies and improve current load
balancer limitations we're seeing. For details please see individual patches.
I've tested them on Kubernetes & Cilium and also added selftests for the small
verifier extension. Thanks!
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f54a5bba 23599ada
......@@ -233,6 +233,7 @@ enum bpf_arg_type {
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
ARG_PTR_TO_CTX, /* pointer to context */
ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
......@@ -1500,6 +1501,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
......
......@@ -45,9 +45,14 @@ static inline void sock_update_classid(struct sock_cgroup_data *skcd)
sock_cgroup_set_classid(skcd, classid);
}
static inline u32 __task_get_classid(struct task_struct *task)
{
return task_cls_state(task)->classid;
}
static inline u32 task_get_classid(const struct sk_buff *skb)
{
u32 classid = task_cls_state(current)->classid;
u32 classid = __task_get_classid(current);
/* Due to the nature of the classifier it is required to ignore all
* packets originating from softirq context as accessing `current'
......
......@@ -168,6 +168,9 @@ struct net {
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
atomic64_t net_cookie; /* written once */
#if IS_ENABLED(CONFIG_IP_VS)
struct netns_ipvs *ipvs;
#endif
......@@ -273,6 +276,8 @@ static inline int check_net(const struct net *net)
void net_drop_ns(void *);
u64 net_gen_cookie(struct net *net);
#else
static inline struct net *get_net(struct net *net)
......@@ -300,6 +305,11 @@ static inline int check_net(const struct net *net)
return 1;
}
static inline u64 net_gen_cookie(struct net *net)
{
return 0;
}
#define net_drop_ns NULL
#endif
......
......@@ -2950,6 +2950,37 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_netns_cookie(void *ctx)
* Description
* Retrieve the cookie (generated by the kernel) of the network
* namespace the input *ctx* is associated with. The network
* namespace cookie remains stable for its lifetime and provides
* a global identifier that can be assumed unique. If *ctx* is
* NULL, then the helper returns the cookie for the initial
* network namespace. The cookie itself is very similar to that
* of bpf_get_socket_cookie() helper, but for network namespaces
* instead of sockets.
* Return
* A 8-byte long opaque number.
*
* u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of the cgroup associated
* with the current task at the *ancestor_level*. The root cgroup
* is at *ancestor_level* zero and each step down the hierarchy
* increments the level. If *ancestor_level* == level of cgroup
* associated with the current task, then return value will be the
* same as that of **bpf_get_current_cgroup_id**\ ().
*
* The helper is useful to implement policies based on cgroups
* that are upper in hierarchy than immediate cgroup associated
* with the current task.
*
* The format of returned id and helper limitations are same as in
* **bpf_get_current_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -3073,7 +3104,9 @@ union bpf_attr {
FN(jiffies64), \
FN(read_branch_records), \
FN(get_ns_current_pid_tgid), \
FN(xdp_output),
FN(xdp_output), \
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -2156,6 +2156,7 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
......
......@@ -340,6 +340,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
.ret_type = RET_INTEGER,
};
BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level)
{
struct cgroup *cgrp = task_dfl_cgroup(current);
struct cgroup *ancestor;
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
return 0;
return cgroup_id(ancestor);
}
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
.func = bpf_get_current_ancestor_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
};
#ifdef CONFIG_CGROUP_BPF
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
......
......@@ -3461,13 +3461,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = CONST_PTR_TO_MAP;
if (type != expected_type)
goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX) {
} else if (arg_type == ARG_PTR_TO_CTX ||
arg_type == ARG_PTR_TO_CTX_OR_NULL) {
expected_type = PTR_TO_CTX;
if (!(register_is_null(reg) &&
arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
if (type != expected_type)
goto err_type;
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
}
} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
expected_type = PTR_TO_SOCK_COMMON;
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
......
......@@ -2642,6 +2642,19 @@ static const struct bpf_func_proto bpf_msg_pop_data_proto = {
.arg4_type = ARG_ANYTHING,
};
#ifdef CONFIG_CGROUP_NET_CLASSID
BPF_CALL_0(bpf_get_cgroup_classid_curr)
{
return __task_get_classid(current);
}
static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
.func = bpf_get_cgroup_classid_curr,
.gpl_only = false,
.ret_type = RET_INTEGER,
};
#endif
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
......@@ -4117,6 +4130,18 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
{
return sock_gen_cookie(ctx);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
.func = bpf_get_socket_cookie_sock,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
return sock_gen_cookie(ctx->sk);
......@@ -4129,6 +4154,39 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
static u64 __bpf_get_netns_cookie(struct sock *sk)
{
#ifdef CONFIG_NET_NS
return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
#else
return 0;
#endif
}
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
{
return __bpf_get_netns_cookie(ctx);
}
static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
.func = bpf_get_netns_cookie_sock,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
};
BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
}
static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
.func = bpf_get_netns_cookie_sock_addr,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
};
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
......@@ -4147,8 +4205,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags,
void *, data, u64, size)
{
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
......@@ -4156,8 +4214,8 @@ BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
}
static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
.func = bpf_sockopt_event_output,
static const struct bpf_func_proto bpf_event_output_data_proto = {
.func = bpf_event_output_data,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
......@@ -5954,6 +6012,26 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sock_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
case BPF_FUNC_get_current_pid_tgid:
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_get_current_ancestor_cgroup_id:
return &bpf_get_current_ancestor_cgroup_id_proto;
#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
return bpf_base_func_proto(func_id);
}
......@@ -5978,8 +6056,26 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sock_addr_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
case BPF_FUNC_get_current_pid_tgid:
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_get_current_ancestor_cgroup_id:
return &bpf_get_current_ancestor_cgroup_id_proto;
#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
return &bpf_sock_addr_sk_lookup_tcp_proto;
......@@ -6222,7 +6318,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
case BPF_FUNC_perf_event_output:
return &bpf_sockopt_event_output_proto;
return &bpf_event_output_data_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
......
......@@ -69,6 +69,20 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
static atomic64_t cookie_gen;
u64 net_gen_cookie(struct net *net)
{
while (1) {
u64 res = atomic64_read(&net->net_cookie);
if (res)
return res;
res = atomic64_inc_return(&cookie_gen);
atomic64_cmpxchg(&net->net_cookie, 0, res);
}
}
static struct net_generic *net_alloc_generic(void)
{
struct net_generic *ng;
......@@ -1087,6 +1101,7 @@ static int __init net_ns_init(void)
panic("Could not allocate generic netns");
rcu_assign_pointer(init_net.gen, ng);
net_gen_cookie(&init_net);
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
......
......@@ -2950,6 +2950,37 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_netns_cookie(void *ctx)
* Description
* Retrieve the cookie (generated by the kernel) of the network
* namespace the input *ctx* is associated with. The network
* namespace cookie remains stable for its lifetime and provides
* a global identifier that can be assumed unique. If *ctx* is
* NULL, then the helper returns the cookie for the initial
* network namespace. The cookie itself is very similar to that
* of bpf_get_socket_cookie() helper, but for network namespaces
* instead of sockets.
* Return
* A 8-byte long opaque number.
*
* u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of the cgroup associated
* with the current task at the *ancestor_level*. The root cgroup
* is at *ancestor_level* zero and each step down the hierarchy
* increments the level. If *ancestor_level* == level of cgroup
* associated with the current task, then return value will be the
* same as that of **bpf_get_current_cgroup_id**\ ().
*
* The helper is useful to implement policies based on cgroups
* that are upper in hierarchy than immediate cgroup associated
* with the current task.
*
* The format of returned id and helper limitations are same as in
* **bpf_get_current_cgroup_id**\ ().
* Return
* The id is returned or 0 in case the id could not be retrieved.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -3073,7 +3104,9 @@ union bpf_attr {
FN(jiffies64), \
FN(read_branch_records), \
FN(get_ns_current_pid_tgid), \
FN(xdp_output),
FN(xdp_output), \
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......
......@@ -91,3 +91,108 @@
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
{
"pass ctx or null check, 1: ctx",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
.result = ACCEPT,
},
{
"pass ctx or null check, 2: null",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
.result = ACCEPT,
},
{
"pass ctx or null check, 3: 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 1),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
.result = REJECT,
.errstr = "R1 type=inv expected=ctx",
},
{
"pass ctx or null check, 4: ctx - const",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
.result = REJECT,
.errstr = "dereference of modified ctx ptr",
},
{
"pass ctx or null check, 5: null (connect)",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
.expected_attach_type = BPF_CGROUP_INET4_CONNECT,
.result = ACCEPT,
},
{
"pass ctx or null check, 6: null (bind)",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_netns_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
.result = ACCEPT,
},
{
"pass ctx or null check, 7: ctx (bind)",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_socket_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
.result = ACCEPT,
},
{
"pass ctx or null check, 8: null (bind)",
.insns = {
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_socket_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
.result = REJECT,
.errstr = "R1 type=inv expected=ctx",
},
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment