Commit 7828f20e authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-cgroup-bind-connect'

Andrey Ignatov says:

====================
v2->v3:
- rebase due to conflicts
- fix ipv6=m build

v1->v2:
- support expected_attach_type at prog load time so that prog (incl.
  context accesses and calls to helpers) can be validated with regard to
  specific attach point it is supposed to be attached to.
  Later, at attach time, attach type is checked so that it must be same as
  at load time if it was provided
- reworked hooks to rely on expected_attach_type, and reduced number of new
  prog types from 6 to just 1: BPF_PROG_TYPE_CGROUP_SOCK_ADDR
- reused BPF_PROG_TYPE_CGROUP_SOCK for sys_bind post-hooks
- add selftests for post-sys_bind hook

For our container management we've been using complicated and fragile setup
consisting of LD_PRELOAD wrapper intercepting bind and connect calls from
all containerized applications. Unfortunately it doesn't work for apps that
don't use glibc and changing all applications that run in the datacenter
is not possible due to 3rd party code and libraries (despite being
open source code) and sheer amount of legacy code that has to be rewritten
(we're rewriting what we can in parallel)

These applications are written without containers in mind and have
builtin assumptions about network services. Like an application X
expects to connect localhost:special_port and find service Y in there.
To move application X and service Y into two different containers
LD_PRELOAD approach is used to help one service connect to another
without rewriting them.
Moving these two applications into different L2 (netns) or L3 (vrf)
network isolation scopes doesn't help to solve the problem, since
applications need to see each other like they were running on
the host without containers.
So if app X and app Y would run in different netns something
would need to punch a connectivity hole in those namespaces.
That would be real layering violation (with corresponding
network debugging pains), since clean l2, l3 abstraction would
suddenly support something that breaks through the layers.

Instead we used LD_PRELOAD (and now bpf programs) at bind/connect
time to help applications discover and connect to each other.
All applications are running in init_nens and there are no vrfs.
After bind/connect the normal fib/neighbor core networking
logic works as it should always do and the whole system is
clean from network point of view and can be debugged with
standard tools.

We also considered resurrecting Hannes's afnetns work,
but all hierarchical namespace abstraction don't work due
to these builtin networking assumptions inside the apps.
To run an application inside cgroup container that was not written
with containers in mind we have to make an illusion of running
in non-containerized environment.
In some cases we remember the port and container id in the post-bind hook
in a bpf map and when some other task in a different container is trying
to connect to a service we need to know where this service is running.
It can be remote and can be local. Both client and service may or may not
be written with containers in mind and this sockaddr rewrite is providing
connectivity and load balancing feature.

BPF+cgroup looks to be the best solution for this problem.
Hence we introduce 3 hooks:
- at entry into sys_bind and sys_connect
  to let bpf prog look and modify 'struct sockaddr' provided
  by user space and fail bind/connect when appropriate
- post sys_bind after port is allocated

The approach works great and has zero overhead for anyone who doesn't
use it and very low overhead when deployed.

Different use case for this feature is to do low overhead firewall
that doesn't need to inspect all packets and works at bind/connect time.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 807ae7da 1d436885
......@@ -6,6 +6,7 @@
#include <uapi/linux/bpf.h>
struct sock;
struct sockaddr;
struct cgroup;
struct sk_buff;
struct bpf_sock_ops_kern;
......@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
enum bpf_attach_type type);
......@@ -93,16 +98,64 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
__ret = __cgroup_bpf_run_filter_sk(sk, \
BPF_CGROUP_INET_SOCK_CREATE); \
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
release_sock(sk); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
sk->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
......@@ -132,9 +185,18 @@ struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
......
......@@ -208,12 +208,15 @@ struct bpf_prog_ops {
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
const struct bpf_func_proto *
(*get_func_proto)(enum bpf_func_id func_id,
const struct bpf_prog *prog);
/* return true if 'size' wide access at offset 'off' within bpf_context
* with 'type' (read or write) is allowed
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
......
......@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
......
......@@ -469,6 +469,7 @@ struct bpf_prog {
is_func:1, /* program is a bpf function */
kprobe_override:1; /* Do we override a kprobe? */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
......@@ -1020,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
return SKF_AD_MAX;
}
struct bpf_sock_addr_kern {
struct sock *sk;
struct sockaddr *uaddr;
/* Temporary "register" to make indirect stores to nested structures
* defined above. We need three registers to make such a store, but
* only two (src and dst) are available at convert_ctx_access time
*/
u64 tmp_reg;
};
struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
......
......@@ -231,6 +231,13 @@ struct ipv6_stub {
};
extern const struct ipv6_stub *ipv6_stub __read_mostly;
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
struct ipv6_bpf_stub {
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
/*
* identify MLD packets for MLD filter exceptions
*/
......
......@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
int inet_listen(struct socket *sock, int backlog);
void inet_sock_destruct(struct sock *sk);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
......
......@@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
int inet6_release(struct socket *sock);
int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock);
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer);
......
......@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
struct proto {
void (*close)(struct sock *sk,
long timeout);
int (*pre_connect)(struct sock *sk,
struct sockaddr *uaddr,
int addr_len);
int (*connect)(struct sock *sk,
struct sockaddr *uaddr,
int addr_len);
......
......@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
......
......@@ -136,6 +136,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
};
enum bpf_attach_type {
......@@ -147,6 +148,12 @@ enum bpf_attach_type {
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
BPF_CGROUP_INET4_BIND,
BPF_CGROUP_INET6_BIND,
BPF_CGROUP_INET4_CONNECT,
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE
};
......@@ -296,6 +303,11 @@ union bpf_attr {
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
/* For some prog types expected attach type must be known at
* load time to verify attach type specific parts of prog
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
......@@ -736,6 +748,13 @@ union bpf_attr {
* @flags: reserved for future use
* Return: SK_PASS
*
* int bpf_bind(ctx, addr, addr_len)
* Bind socket to address. Only binding to IP is supported, no port can be
* set in addr.
* @ctx: pointer to context of type bpf_sock_addr
* @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -801,7 +820,8 @@ union bpf_attr {
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data),
FN(msg_pull_data), \
FN(bind),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -930,6 +950,15 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
*/
};
#define XDP_PACKET_HEADROOM 256
......@@ -1005,6 +1034,26 @@ struct bpf_map_info {
__u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
__u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__u32 user_port; /* Allows 4-byte read and write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
* and their replies.
* Some of this fields are in network (bigendian) byte order and may need
......
......@@ -494,6 +494,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
/**
* __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
* provided by user sockaddr
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted
*
* socket is expected to be of type INET or INET6.
*
* This function will return %-EPERM if an attached program is found and
* returned value != 1 during execution. In all other cases, 0 is returned.
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type)
{
struct bpf_sock_addr_kern ctx = {
.sk = sk,
.uaddr = uaddr,
};
struct cgroup *cgrp;
int ret;
/* Check socket family since not all sockets represent network
* endpoint (e.g. AF_UNIX).
*/
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
/**
* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
* @sk: socket to get cgroup from
......@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id)
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
......@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
......
......@@ -1171,8 +1171,75 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
/* Initially all BPF programs could be loaded w/o specifying
* expected_attach_type. Later for some of them specifying expected_attach_type
* at load time became required so that program could be validated properly.
* Programs of types that are allowed to be loaded both w/ and w/o (for
* backward compatibility) expected_attach_type, should have the default attach
* type assigned to expected_attach_type for the latter case, so that it can be
* validated later at attach time.
*
* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
* prog type requires it but has some attach types that have to be backward
* compatible.
*/
static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
{
switch (attr->prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
* exist so checking for non-zero is the way to go here.
*/
if (!attr->expected_attach_type)
attr->expected_attach_type =
BPF_CGROUP_INET_SOCK_CREATE;
break;
}
}
static int
bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type)
{
switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
switch (expected_attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
return 0;
default:
return -EINVAL;
}
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
switch (expected_attach_type) {
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
return 0;
default:
return -EINVAL;
}
default:
return 0;
}
}
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type)
{
switch (prog->type) {
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
default:
return 0;
}
}
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex
#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
static int bpf_prog_load(union bpf_attr *attr)
{
......@@ -1209,11 +1276,17 @@ static int bpf_prog_load(union bpf_attr *attr)
!capable(CAP_SYS_ADMIN))
return -EPERM;
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
return -EINVAL;
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
return -ENOMEM;
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->offload_requested = !!attr->prog_ifindex;
err = security_bpf_prog_alloc(prog->aux);
......@@ -1453,8 +1526,16 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
......@@ -1474,6 +1555,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (IS_ERR(prog))
return PTR_ERR(prog);
if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
bpf_prog_put(prog);
return -EINVAL;
}
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp)) {
bpf_prog_put(prog);
......@@ -1510,8 +1596,16 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
break;
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
......@@ -1561,6 +1655,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
case BPF_CGROUP_INET_SOCK_CREATE:
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_POST_BIND:
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
......
......@@ -1323,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
};
if (env->ops->is_valid_access &&
env->ops->is_valid_access(off, size, t, &info)) {
env->ops->is_valid_access(off, size, t, env->prog, &info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
......@@ -2349,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
if (env->ops->get_func_proto)
fn = env->ops->get_func_proto(func_id);
fn = env->ops->get_func_proto(func_id, env->prog);
if (!fn) {
verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
func_id);
......@@ -3887,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
switch (env->prog->type) {
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
break;
......@@ -5572,7 +5573,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm);
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
......
......@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
......@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
}
}
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_override_return_proto;
#endif
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < 0 || off >= sizeof(struct pt_regs))
......@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
......@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
.arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -731,7 +737,7 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
......@@ -781,7 +787,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
.arg3_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
static const struct bpf_func_proto *
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_perf_event_output:
......@@ -789,12 +796,13 @@ static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_raw_tp;
default:
return tracing_func_proto(func_id);
return tracing_func_proto(func_id, prog);
}
}
static bool raw_tp_prog_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
/* largest tracepoint in the kernel has 12 args */
......@@ -816,6 +824,7 @@ const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_u64 = sizeof(u64);
......
This diff is collapsed.
......@@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
u32 tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
return sk->sk_prot->bind(sk, uaddr, addr_len);
}
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
return -EINVAL;
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
if (err)
return err;
return __inet_bind(sk, uaddr, addr_len, false, true);
}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
u32 tb_id = RT_TABLE_LOCAL;
int err;
if (addr->sin_family != AF_INET) {
/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
......@@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
lock_sock(sk);
if (with_lock)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
......@@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) {
if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
if (err) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
goto out_release_sock;
}
}
if (inet->inet_rcv_saddr)
......@@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
if (with_lock)
release_sock(sk);
out:
return err;
}
EXPORT_SYMBOL(inet_bind);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
int err;
if (addr_len < sizeof(uaddr->sa_family))
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC)
return sk->sk_prot->disconnect(sk, flags);
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
if (err)
return err;
}
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
......@@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_state != TCP_CLOSE)
goto out;
if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
if (err)
goto out;
}
err = sk->sk_prot->connect(sk, uaddr, addr_len);
if (err < 0)
goto out;
......
......@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v4_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
}
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
......@@ -2409,6 +2424,7 @@ struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
......
......@@ -1658,6 +1658,19 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
goto try_again;
}
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
/* This check is replicated from __ip4_datagram_connect() and
* intended to prevent BPF program called below from accessing bytes
* that are out of the bound specified by user in addr_len.
*/
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
}
EXPORT_SYMBOL(udp_pre_connect);
int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
......@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udp_pre_connect,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
......
......@@ -277,15 +277,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
bool saved_ipv6only;
int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
......@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
if (err)
return err;
return __inet6_bind(sk, uaddr, addr_len, false, true);
}
EXPORT_SYMBOL(inet6_bind);
int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
bool force_bind_address_no_port, bool with_lock)
{
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
bool saved_ipv6only;
int addr_type = 0;
int err = 0;
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
......@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
lock_sock(sk);
if (with_lock)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
......@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
if (snum || !(inet->bind_address_no_port ||
force_bind_address_no_port)) {
if (sk->sk_prot->get_port(sk, snum)) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
if (err) {
sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
goto out;
}
}
if (addr_type != IPV6_ADDR_ANY)
......@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
release_sock(sk);
if (with_lock)
release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
......@@ -869,6 +894,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.inet6_bind = __inet6_bind,
};
static int __init inet6_init(void)
{
struct list_head *r;
......@@ -1025,6 +1054,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
......
......@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
ipv6_hdr(skb)->saddr.s6_addr32);
}
static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* This check is replicated from tcp_v6_connect() and intended to
* prevent BPF program called below from accessing bytes that are out
* of the bound specified by user in addr_len.
*/
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
sock_owned_by_me(sk);
return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
......@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
......
......@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
}
static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
/* The following checks are replicated from __ip6_datagram_connect()
* and intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
*/
if (uaddr->sa_family == AF_INET) {
if (__ipv6_only_sock(sk))
return -EAFNOSUPPORT;
return udp_pre_connect(sk, uaddr, addr_len);
}
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
}
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
......@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
.name = "UDPv6",
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
......
......@@ -136,6 +136,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
};
enum bpf_attach_type {
......@@ -147,6 +148,12 @@ enum bpf_attach_type {
BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_DEVICE,
BPF_SK_MSG_VERDICT,
BPF_CGROUP_INET4_BIND,
BPF_CGROUP_INET6_BIND,
BPF_CGROUP_INET4_CONNECT,
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
__MAX_BPF_ATTACH_TYPE
};
......@@ -296,6 +303,11 @@ union bpf_attr {
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
/* For some prog types expected attach type must be known at
* load time to verify attach type specific parts of prog
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
......@@ -736,6 +748,13 @@ union bpf_attr {
* @flags: reserved for future use
* Return: SK_PASS
*
* int bpf_bind(ctx, addr, addr_len)
* Bind socket to address. Only binding to IP is supported, no port can be
* set in addr.
* @ctx: pointer to context of type bpf_sock_addr
* @addr: pointer to struct sockaddr to bind socket to
* @addr_len: length of sockaddr structure
* Return: 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -801,7 +820,8 @@ union bpf_attr {
FN(msg_redirect_map), \
FN(msg_apply_bytes), \
FN(msg_cork_bytes), \
FN(msg_pull_data),
FN(msg_pull_data), \
FN(bind),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
......@@ -929,6 +949,15 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
__u32 src_ip4; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
* Stored in network byte order.
*/
__u32 src_port; /* Allows 4-byte read.
* Stored in host byte order
*/
};
#define XDP_PACKET_HEADROOM 256
......@@ -1004,6 +1033,26 @@ struct bpf_map_info {
__u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
*/
struct bpf_sock_addr {
__u32 user_family; /* Allows 4-byte read, but no write. */
__u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order.
*/
__u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
__u32 user_port; /* Allows 4-byte read and write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
* and their replies.
* Some of this fields are in network (bigendian) byte order and may need
......
......@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
-1);
}
int bpf_load_program_name(enum bpf_prog_type type, const char *name,
const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
size_t log_buf_sz)
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz)
{
int fd;
union bpf_attr attr;
__u32 name_len = name ? strlen(name) : 0;
__u32 name_len;
int fd;
if (!load_attr)
return -EINVAL;
name_len = load_attr->name ? strlen(load_attr->name) : 0;
bzero(&attr, sizeof(attr));
attr.prog_type = type;
attr.insn_cnt = (__u32)insns_cnt;
attr.insns = ptr_to_u64(insns);
attr.license = ptr_to_u64(license);
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
attr.insn_cnt = (__u32)load_attr->insns_cnt;
attr.insns = ptr_to_u64(load_attr->insns);
attr.license = ptr_to_u64(load_attr->license);
attr.log_buf = ptr_to_u64(NULL);
attr.log_size = 0;
attr.log_level = 0;
attr.kern_version = kern_version;
memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.kern_version = load_attr->kern_version;
memcpy(attr.prog_name, load_attr->name,
min(name_len, BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (fd >= 0 || !log_buf || !log_buf_sz)
......@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
__u32 kern_version, char *log_buf,
size_t log_buf_sz)
{
return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
kern_version, log_buf, log_buf_sz);
struct bpf_load_program_attr load_attr;
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = type;
load_attr.expected_attach_type = 0;
load_attr.name = NULL;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
load_attr.kern_version = kern_version;
return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
}
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
......
......@@ -41,13 +41,20 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
int key_size, int inner_map_fd, int max_entries,
__u32 map_flags);
struct bpf_load_program_attr {
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
const char *name;
const struct bpf_insn *insns;
size_t insns_cnt;
const char *license;
__u32 kern_version;
};
/* Recommend log buffer size */
#define BPF_LOG_BUF_SIZE (256 * 1024)
int bpf_load_program_name(enum bpf_prog_type type, const char *name,
const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
size_t log_buf_sz);
int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
char *log_buf, size_t log_buf_sz);
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
......
......@@ -203,6 +203,8 @@ struct bpf_program {
struct bpf_object *obj;
void *priv;
bpf_program_clear_priv_t clear_priv;
enum bpf_attach_type expected_attach_type;
};
struct bpf_map {
......@@ -1162,21 +1164,31 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
}
static int
load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
int insns_cnt, char *license, u32 kern_version, int *pfd)
load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
const char *name, struct bpf_insn *insns, int insns_cnt,
char *license, u32 kern_version, int *pfd)
{
int ret;
struct bpf_load_program_attr load_attr;
char *log_buf;
int ret;
if (!insns || !insns_cnt)
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = type;
load_attr.expected_attach_type = expected_attach_type;
load_attr.name = name;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
load_attr.kern_version = kern_version;
if (!load_attr.insns || !load_attr.insns_cnt)
return -EINVAL;
log_buf = malloc(BPF_LOG_BUF_SIZE);
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
ret = bpf_load_program_name(type, name, insns, insns_cnt, license,
kern_version, log_buf, BPF_LOG_BUF_SIZE);
ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
if (ret >= 0) {
*pfd = ret;
......@@ -1192,18 +1204,18 @@ load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
pr_warning("-- BEGIN DUMP LOG ---\n");
pr_warning("\n%s\n", log_buf);
pr_warning("-- END LOG --\n");
} else if (insns_cnt >= BPF_MAXINSNS) {
pr_warning("Program too large (%d insns), at most %d insns\n",
insns_cnt, BPF_MAXINSNS);
} else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
pr_warning("Program too large (%zu insns), at most %d insns\n",
load_attr.insns_cnt, BPF_MAXINSNS);
ret = -LIBBPF_ERRNO__PROG2BIG;
} else {
/* Wrong program type? */
if (type != BPF_PROG_TYPE_KPROBE) {
if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
int fd;
fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name,
insns, insns_cnt, license,
kern_version, NULL, 0);
load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
load_attr.expected_attach_type = 0;
fd = bpf_load_program_xattr(&load_attr, NULL, 0);
if (fd >= 0) {
close(fd);
ret = -LIBBPF_ERRNO__PROGTYPE;
......@@ -1247,8 +1259,9 @@ bpf_program__load(struct bpf_program *prog,
pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
prog->section_name, prog->instances.nr);
}
err = load_program(prog->type, prog->name, prog->insns,
prog->insns_cnt, license, kern_version, &fd);
err = load_program(prog->type, prog->expected_attach_type,
prog->name, prog->insns, prog->insns_cnt,
license, kern_version, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
......@@ -1276,8 +1289,8 @@ bpf_program__load(struct bpf_program *prog,
continue;
}
err = load_program(prog->type, prog->name,
result.new_insn_ptr,
err = load_program(prog->type, prog->expected_attach_type,
prog->name, result.new_insn_ptr,
result.new_insn_cnt,
license, kern_version, &fd);
......@@ -1835,11 +1848,25 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
{
prog->expected_attach_type = type;
}
#define BPF_PROG_SEC_FULL(string, ptype, atype) \
{ string, sizeof(string) - 1, ptype, atype }
#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
#define BPF_SA_PROG_SEC(string, ptype) \
BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
static const struct {
const char *sec;
size_t len;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
} section_names[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
......@@ -1858,10 +1885,17 @@ static const struct {
BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
};
#undef BPF_PROG_SEC
#undef BPF_PROG_SEC_FULL
#undef BPF_SA_PROG_SEC
static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
static int bpf_program__identify_section(struct bpf_program *prog)
{
int i;
......@@ -1871,13 +1905,13 @@ static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
for (i = 0; i < ARRAY_SIZE(section_names); i++)
if (strncmp(prog->section_name, section_names[i].sec,
section_names[i].len) == 0)
return section_names[i].prog_type;
return i;
err:
pr_warning("failed to guess program type based on section name %s\n",
prog->section_name);
return BPF_PROG_TYPE_UNSPEC;
return -1;
}
int bpf_map__fd(struct bpf_map *map)
......@@ -1976,12 +2010,31 @@ long libbpf_get_error(const void *ptr)
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd)
{
struct bpf_prog_load_attr attr;
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = file;
attr.prog_type = type;
attr.expected_attach_type = 0;
return bpf_prog_load_xattr(&attr, pobj, prog_fd);
}
int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_object **pobj, int *prog_fd)
{
struct bpf_program *prog, *first_prog = NULL;
enum bpf_attach_type expected_attach_type;
enum bpf_prog_type prog_type;
struct bpf_object *obj;
int section_idx;
int err;
obj = bpf_object__open(file);
if (!attr)
return -EINVAL;
obj = bpf_object__open(attr->file);
if (IS_ERR(obj))
return -ENOENT;
......@@ -1990,15 +2043,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
* If type is not specified, try to guess it based on
* section name.
*/
if (type == BPF_PROG_TYPE_UNSPEC) {
type = bpf_program__guess_type(prog);
if (type == BPF_PROG_TYPE_UNSPEC) {
prog_type = attr->prog_type;
expected_attach_type = attr->expected_attach_type;
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
section_idx = bpf_program__identify_section(prog);
if (section_idx < 0) {
bpf_object__close(obj);
return -EINVAL;
}
prog_type = section_names[section_idx].prog_type;
expected_attach_type =
section_names[section_idx].expected_attach_type;
}
bpf_program__set_type(prog, type);
bpf_program__set_type(prog, prog_type);
bpf_program__set_expected_attach_type(prog,
expected_attach_type);
if (prog->idx != obj->efile.text_shndx && !first_prog)
first_prog = prog;
}
......
......@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
long libbpf_get_error(const void *ptr);
struct bpf_prog_load_attr {
const char *file;
enum bpf_prog_type prog_type;
enum bpf_attach_type expected_attach_type;
};
int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_object **pobj, int *prog_fd);
int bpf_prog_load(const char *file, enum bpf_prog_type type,
struct bpf_object **pobj, int *prog_fd);
......
......@@ -23,21 +23,23 @@ urandom_read: urandom_read.c
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_sock_addr
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_libbpf.sh \
test_xdp_redirect.sh \
test_xdp_meta.sh \
test_offload.py
test_offload.py \
test_sock_addr.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open
......@@ -51,6 +53,8 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
.PHONY: force
......
......@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
(void *) BPF_FUNC_msg_cork_bytes;
static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
(void *) BPF_FUNC_msg_pull_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
(void *) BPF_FUNC_bind;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
......
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
int _version SEC("version") = 1;
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
struct sockaddr_in sa;
/* Rewrite destination. */
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
///* Rewrite source. */
memset(&sa, 0, sizeof(sa));
sa.sin_family = AF_INET;
sa.sin_port = bpf_htons(0);
sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
}
return 1;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <string.h>
#include <linux/stddef.h>
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <sys/socket.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define SRC_REWRITE_IP6_0 0
#define SRC_REWRITE_IP6_1 0
#define SRC_REWRITE_IP6_2 0
#define SRC_REWRITE_IP6_3 6
#define DST_REWRITE_IP6_0 0
#define DST_REWRITE_IP6_1 0
#define DST_REWRITE_IP6_2 0
#define DST_REWRITE_IP6_3 1
#define DST_REWRITE_PORT6 6666
int _version SEC("version") = 1;
SEC("cgroup/connect6")
int connect_v6_prog(struct bpf_sock_addr *ctx)
{
struct sockaddr_in6 sa;
/* Rewrite destination. */
ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
/* Rewrite source. */
memset(&sa, 0, sizeof(sa));
sa.sin6_family = AF_INET6;
sa.sin6_port = bpf_htons(0);
sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
return 0;
}
return 1;
}
char _license[] SEC("license") = "GPL";
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <linux/filter.h>
#include <bpf/bpf.h>
#include "cgroup_helpers.h"
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
#define CG_PATH "/foo"
#define MAX_INSNS 512
char bpf_log_buf[BPF_LOG_BUF_SIZE];
struct sock_test {
const char *descr;
/* BPF prog properties */
struct bpf_insn insns[MAX_INSNS];
enum bpf_attach_type expected_attach_type;
enum bpf_attach_type attach_type;
/* Socket properties */
int domain;
int type;
/* Endpoint to bind() to */
const char *ip;
unsigned short port;
/* Expected test result */
enum {
LOAD_REJECT,
ATTACH_REJECT,
BIND_REJECT,
SUCCESS,
} result;
};
static struct sock_test tests[] = {
{
"bind4 load with invalid access: src_ip6",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip6[0])),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
0,
0,
NULL,
0,
LOAD_REJECT,
},
{
"bind4 load with invalid access: mark",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, mark)),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
0,
0,
NULL,
0,
LOAD_REJECT,
},
{
"bind6 load with invalid access: src_ip4",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip4)),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
0,
0,
NULL,
0,
LOAD_REJECT,
},
{
"sock_create load with invalid access: src_port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_port)),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_INET_SOCK_CREATE,
0,
0,
NULL,
0,
LOAD_REJECT,
},
{
"sock_create load w/o expected_attach_type (compat mode)",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
0,
BPF_CGROUP_INET_SOCK_CREATE,
AF_INET,
SOCK_STREAM,
"127.0.0.1",
8097,
SUCCESS,
},
{
"sock_create load w/ expected_attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_INET_SOCK_CREATE,
AF_INET,
SOCK_STREAM,
"127.0.0.1",
8097,
SUCCESS,
},
{
"attach type mismatch bind4 vs bind6",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
0,
0,
NULL,
0,
ATTACH_REJECT,
},
{
"attach type mismatch bind6 vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
0,
0,
NULL,
0,
ATTACH_REJECT,
},
{
"attach type mismatch default vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
0,
BPF_CGROUP_INET4_POST_BIND,
0,
0,
NULL,
0,
ATTACH_REJECT,
},
{
"attach type mismatch bind6 vs sock_create",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET_SOCK_CREATE,
0,
0,
NULL,
0,
ATTACH_REJECT,
},
{
"bind4 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
AF_INET,
SOCK_STREAM,
"0.0.0.0",
0,
BIND_REJECT,
},
{
"bind6 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
AF_INET6,
SOCK_STREAM,
"::",
0,
BIND_REJECT,
},
{
"bind6 deny specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
/* if (ip == expected && port == expected) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip6[3])),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_port)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
/* return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_A(1),
/* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
AF_INET6,
SOCK_STREAM,
"::1",
8193,
BIND_REJECT,
},
{
"bind4 allow specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
/* if (ip == expected && port == expected) */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_ip4)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock, src_port)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_A(1),
/* else return DENY; */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
AF_INET,
SOCK_STREAM,
"127.0.0.1",
4098,
SUCCESS,
},
{
"bind4 allow all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET4_POST_BIND,
AF_INET,
SOCK_STREAM,
"0.0.0.0",
0,
SUCCESS,
},
{
"bind6 allow all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
BPF_CGROUP_INET6_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
AF_INET6,
SOCK_STREAM,
"::",
0,
SUCCESS,
},
};
static size_t probe_prog_length(const struct bpf_insn *fp)
{
size_t len;
for (len = MAX_INSNS - 1; len > 0; --len)
if (fp[len].code != 0 || fp[len].imm != 0)
break;
return len + 1;
}
static int load_sock_prog(const struct bpf_insn *prog,
enum bpf_attach_type attach_type)
{
struct bpf_load_program_attr attr;
memset(&attr, 0, sizeof(struct bpf_load_program_attr));
attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
attr.expected_attach_type = attach_type;
attr.insns = prog;
attr.insns_cnt = probe_prog_length(attr.insns);
attr.license = "GPL";
return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
}
static int attach_sock_prog(int cgfd, int progfd,
enum bpf_attach_type attach_type)
{
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
}
static int bind_sock(int domain, int type, const char *ip, unsigned short port)
{
struct sockaddr_storage addr;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr4;
int sockfd = -1;
socklen_t len;
int err = 0;
sockfd = socket(domain, type, 0);
if (sockfd < 0)
goto err;
memset(&addr, 0, sizeof(addr));
if (domain == AF_INET) {
len = sizeof(struct sockaddr_in);
addr4 = (struct sockaddr_in *)&addr;
addr4->sin_family = domain;
addr4->sin_port = htons(port);
if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
goto err;
} else if (domain == AF_INET6) {
len = sizeof(struct sockaddr_in6);
addr6 = (struct sockaddr_in6 *)&addr;
addr6->sin6_family = domain;
addr6->sin6_port = htons(port);
if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
goto err;
} else {
goto err;
}
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
goto err;
goto out;
err:
err = -1;
out:
close(sockfd);
return err;
}
static int run_test_case(int cgfd, const struct sock_test *test)
{
int progfd = -1;
int err = 0;
printf("Test case: %s .. ", test->descr);
progfd = load_sock_prog(test->insns, test->expected_attach_type);
if (progfd < 0) {
if (test->result == LOAD_REJECT)
goto out;
else
goto err;
}
if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
if (test->result == ATTACH_REJECT)
goto out;
else
goto err;
}
if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
/* sys_bind() may fail for different reasons, errno has to be
* checked to confirm that BPF program rejected it.
*/
if (test->result == BIND_REJECT && errno == EPERM)
goto out;
else
goto err;
}
if (test->result != SUCCESS)
goto err;
goto out;
err:
err = -1;
out:
/* Detaching w/o checking return code: best effort attempt. */
if (progfd != -1)
bpf_prog_detach(cgfd, test->attach_type);
close(progfd);
printf("[%s]\n", err ? "FAIL" : "PASS");
return err;
}
static int run_tests(int cgfd)
{
int passes = 0;
int fails = 0;
int i;
for (i = 0; i < ARRAY_SIZE(tests); ++i) {
if (run_test_case(cgfd, &tests[i]))
++fails;
else
++passes;
}
printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
return fails ? -1 : 0;
}
int main(int argc, char **argv)
{
int cgfd = -1;
int err = 0;
if (setup_cgroup_environment())
goto err;
cgfd = create_and_get_cgroup(CG_PATH);
if (!cgfd)
goto err;
if (join_cgroup(CG_PATH))
goto err;
if (run_tests(cgfd))
goto err;
goto out;
err:
err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
return err;
}
This diff is collapsed.
#!/bin/sh
set -eu
ping_once()
{
ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
}
wait_for_ip()
{
local _i
echo -n "Wait for testing IPv4/IPv6 to become available "
for _i in $(seq ${MAX_PING_TRIES}); do
echo -n "."
if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
echo " OK"
return
fi
done
echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
exit 1
}
setup()
{
# Create testing interfaces not to interfere with current environment.
ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
ip link set ${TEST_IF} up
ip link set ${TEST_IF_PEER} up
ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
wait_for_ip
}
cleanup()
{
ip link del ${TEST_IF} 2>/dev/null || :
ip link del ${TEST_IF_PEER} 2>/dev/null || :
}
main()
{
trap cleanup EXIT 2 3 6 15
setup
./test_sock_addr setup_done
}
BASENAME=$(basename $0 .sh)
TEST_IF="${BASENAME}1"
TEST_IF_PEER="${BASENAME}2"
TEST_IPv4="127.0.0.4/8"
TEST_IPv6="::6/128"
MAX_PING_TRIES=5
main
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment