Commit aac3fc32 authored by Andrey Ignatov's avatar Andrey Ignatov Committed by Daniel Borkmann

bpf: Post-hooks for sys_bind

"Post-hooks" are hooks that are called right before returning from
sys_bind. At this time IP and port are already allocated and no further
changes to `struct sock` can happen before returning from sys_bind but
BPF program has a chance to inspect the socket and change sys_bind
result.

Specifically it can e.g. inspect what port was allocated and if it
doesn't satisfy some policy, BPF program can force sys_bind to fail and
return EPERM to user.

Another example of usage is recording the IP:port pair to some map to
use it in later calls to sys_connect. E.g. if some TCP server inside
cgroup was bound to some IP:port_n, it can be recorded to a map. And
later when some TCP client inside same cgroup is trying to connect to
127.0.0.1:port_n, BPF hook for sys_connect can override the destination
and connect application to IP:port_n instead of 127.0.0.1:port_n. That
helps forcing all applications inside a cgroup to use desired IP and not
break those applications if they e.g. use localhost to communicate
between each other.

== Implementation details ==

Post-hooks are implemented as two new attach types
`BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for
existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`.

Separate attach types for IPv4 and IPv6 are introduced to avoid access
to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from
`inet6_bind()` since those fields might not make sense in such cases.
Signed-off-by: default avatarAndrey Ignatov <rdna@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 622adafb
...@@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ...@@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
__ret; \ __ret; \
}) })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ #define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \ ({ \
int __ret = 0; \ int __ret = 0; \
if (cgroup_bpf_enabled) { \ if (cgroup_bpf_enabled) { \
__ret = __cgroup_bpf_run_filter_sk(sk, \ __ret = __cgroup_bpf_run_filter_sk(sk, type); \
BPF_CGROUP_INET_SOCK_CREATE); \
} \ } \
__ret; \ __ret; \
}) })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \ #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \ ({ \
int __ret = 0; \ int __ret = 0; \
...@@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } ...@@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
......
...@@ -152,6 +152,8 @@ enum bpf_attach_type { ...@@ -152,6 +152,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_BIND, BPF_CGROUP_INET6_BIND,
BPF_CGROUP_INET4_CONNECT, BPF_CGROUP_INET4_CONNECT,
BPF_CGROUP_INET6_CONNECT, BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
}; };
...@@ -948,6 +950,15 @@ struct bpf_sock { ...@@ -948,6 +950,15 @@ struct bpf_sock {
__u32 protocol; __u32 protocol;
__u32 mark; __u32 mark;
__u32 priority; __u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
*/
}; };
#define XDP_PACKET_HEADROOM 256 #define XDP_PACKET_HEADROOM 256
......
...@@ -1171,11 +1171,46 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, ...@@ -1171,11 +1171,46 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
} }
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
/* Initially all BPF programs could be loaded w/o specifying
* expected_attach_type. Later for some of them specifying expected_attach_type
* at load time became required so that program could be validated properly.
* Programs of types that are allowed to be loaded both w/ and w/o (for
* backward compatibility) expected_attach_type, should have the default attach
* type assigned to expected_attach_type for the latter case, so that it can be
* validated later at attach time.
*
* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
* prog type requires it but has some attach types that have to be backward
* compatible.
*/
static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
{
switch (attr->prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
* exist so checking for non-zero is the way to go here.
*/
if (!attr->expected_attach_type)
attr->expected_attach_type =
BPF_CGROUP_INET_SOCK_CREATE;
break;
}
}
static int static int
bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type) enum bpf_attach_type expected_attach_type)
{ {
switch (prog_type) { switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
switch (expected_attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
return 0;
default:
return -EINVAL;
}
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
switch (expected_attach_type) { switch (expected_attach_type) {
case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_BIND:
...@@ -1195,6 +1230,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, ...@@ -1195,6 +1230,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type) enum bpf_attach_type attach_type)
{ {
switch (prog->type) { switch (prog->type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL; return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
default: default:
...@@ -1240,6 +1276,7 @@ static int bpf_prog_load(union bpf_attr *attr) ...@@ -1240,6 +1276,7 @@ static int bpf_prog_load(union bpf_attr *attr)
!capable(CAP_SYS_ADMIN)) !capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type)) if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
return -EINVAL; return -EINVAL;
...@@ -1489,6 +1526,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) ...@@ -1489,6 +1526,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB; ptype = BPF_PROG_TYPE_CGROUP_SKB;
break; break;
case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK; ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break; break;
case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_BIND:
...@@ -1557,6 +1596,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) ...@@ -1557,6 +1596,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB; ptype = BPF_PROG_TYPE_CGROUP_SKB;
break; break;
case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK; ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break; break;
case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_BIND:
...@@ -1616,6 +1657,8 @@ static int bpf_prog_query(const union bpf_attr *attr, ...@@ -1616,6 +1657,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_SOCK_OPS:
......
...@@ -4097,30 +4097,80 @@ static bool lwt_is_valid_access(int off, int size, ...@@ -4097,30 +4097,80 @@ static bool lwt_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info); return bpf_skb_is_valid_access(off, size, type, prog, info);
} }
static bool sock_filter_is_valid_access(int off, int size,
enum bpf_access_type type, /* Attach type specific accesses */
const struct bpf_prog *prog, static bool __sock_filter_check_attach_type(int off,
struct bpf_insn_access_aux *info) enum bpf_access_type access_type,
enum bpf_attach_type attach_type)
{ {
if (type == BPF_WRITE) { switch (off) {
switch (off) { case offsetof(struct bpf_sock, bound_dev_if):
case offsetof(struct bpf_sock, bound_dev_if): case offsetof(struct bpf_sock, mark):
case offsetof(struct bpf_sock, mark): case offsetof(struct bpf_sock, priority):
case offsetof(struct bpf_sock, priority): switch (attach_type) {
break; case BPF_CGROUP_INET_SOCK_CREATE:
goto full_access;
default:
return false;
}
case bpf_ctx_range(struct bpf_sock, src_ip4):
switch (attach_type) {
case BPF_CGROUP_INET4_POST_BIND:
goto read_only;
default:
return false;
}
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
switch (attach_type) {
case BPF_CGROUP_INET6_POST_BIND:
goto read_only;
default:
return false;
}
case bpf_ctx_range(struct bpf_sock, src_port):
switch (attach_type) {
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
goto read_only;
default: default:
return false; return false;
} }
} }
read_only:
return access_type == BPF_READ;
full_access:
return true;
}
static bool __sock_filter_check_size(int off, int size,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
if (off < 0 || off + size > sizeof(struct bpf_sock)) switch (off) {
case bpf_ctx_range(struct bpf_sock, src_ip4):
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
}
return size == size_default;
}
static bool sock_filter_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct bpf_sock))
return false; return false;
/* The verifier guarantees that size > 0. */
if (off % size != 0) if (off % size != 0)
return false; return false;
if (size != sizeof(__u32)) if (!__sock_filter_check_attach_type(off, type,
prog->expected_attach_type))
return false;
if (!__sock_filter_check_size(off, size, info))
return false; return false;
return true; return true;
} }
...@@ -4728,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, ...@@ -4728,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size) struct bpf_prog *prog, u32 *target_size)
{ {
struct bpf_insn *insn = insn_buf; struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) { switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if): case offsetof(struct bpf_sock, bound_dev_if):
...@@ -4783,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, ...@@ -4783,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
break; break;
case offsetof(struct bpf_sock, src_ip4):
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_rcv_saddr,
FIELD_SIZEOF(struct sock_common,
skc_rcv_saddr),
target_size));
break;
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
off = si->off;
off -= offsetof(struct bpf_sock, src_ip6[0]);
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(
struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0],
FIELD_SIZEOF(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]),
target_size) + off);
#else
(void)off;
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
break;
case offsetof(struct bpf_sock, src_port):
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_num,
FIELD_SIZEOF(struct sock_common,
skc_num),
target_size));
break;
} }
return insn - insn_buf; return insn - insn_buf;
......
...@@ -519,12 +519,18 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, ...@@ -519,12 +519,18 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
inet->inet_saddr = 0; /* Use device */ inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */ /* Make sure we are allowed to bind here. */
if ((snum || !(inet->bind_address_no_port || if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) && force_bind_address_no_port)) {
sk->sk_prot->get_port(sk, snum)) { if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0; inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE; err = -EADDRINUSE;
goto out_release_sock; goto out_release_sock;
}
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
goto out_release_sock;
}
} }
if (inet->inet_rcv_saddr) if (inet->inet_rcv_saddr)
......
...@@ -412,13 +412,20 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, ...@@ -412,13 +412,20 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
sk->sk_ipv6only = 1; sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */ /* Make sure we are allowed to bind here. */
if ((snum || !(inet->bind_address_no_port || if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) && force_bind_address_no_port)) {
sk->sk_prot->get_port(sk, snum)) { if (sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only; sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk); inet_reset_saddr(sk);
err = -EADDRINUSE; err = -EADDRINUSE;
goto out; goto out;
}
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
goto out;
}
} }
if (addr_type != IPV6_ADDR_ANY) if (addr_type != IPV6_ADDR_ANY)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment