Commit 8fd68207 authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-Add-option-to-set-mark-and-priority-in-cgroup-sock-programs'

David Ahern says:

====================
bpf: Add option to set mark and priority in cgroup sock programs

Add option to set mark and priority in addition to bound device for newly
created sockets. Also, allow the bpf programs to use the get_current_uid_gid
helper meaning socket marks, priority and device can be set based on the
uid/gid of the running process.

Sample programs are updated to demonstrate the new options.

v3
- no changes to Patches 1 and 2 which Alexei acked in previous versions
- dropped change related to recursive programs in a cgroup
- updated tests per dropped patch

v2
- added flag to control recursive behavior as requested by Alexei
- added comment to sock_filter_func_proto regarding use of
  get_current_uid_gid helper
- updated test programs for recursive option
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e12f1a59 0adc3dd9
...@@ -758,6 +758,8 @@ struct bpf_sock { ...@@ -758,6 +758,8 @@ struct bpf_sock {
__u32 family; __u32 family;
__u32 type; __u32 type;
__u32 protocol; __u32 protocol;
__u32 mark;
__u32 priority;
}; };
#define XDP_PACKET_HEADROOM 256 #define XDP_PACKET_HEADROOM 256
......
...@@ -3149,6 +3149,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) ...@@ -3149,6 +3149,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
} }
} }
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
/* inet and inet6 sockets are created in a process
* context so there is always a valid uid/gid
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto * static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id) sk_filter_func_proto(enum bpf_func_id func_id)
{ {
...@@ -3455,6 +3469,10 @@ static bool sock_filter_is_valid_access(int off, int size, ...@@ -3455,6 +3469,10 @@ static bool sock_filter_is_valid_access(int off, int size,
switch (off) { switch (off) {
case offsetof(struct bpf_sock, bound_dev_if): case offsetof(struct bpf_sock, bound_dev_if):
break; break;
case offsetof(struct bpf_sock, mark):
break;
case offsetof(struct bpf_sock, priority):
break;
default: default:
return false; return false;
} }
...@@ -3958,6 +3976,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, ...@@ -3958,6 +3976,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sock, sk_bound_dev_if)); offsetof(struct sock, sk_bound_dev_if));
break; break;
case offsetof(struct bpf_sock, mark):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_mark));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_mark));
break;
case offsetof(struct bpf_sock, priority):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_priority));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_priority));
break;
case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
...@@ -4207,7 +4247,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = { ...@@ -4207,7 +4247,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = {
}; };
const struct bpf_verifier_ops cg_sock_prog_ops = { const struct bpf_verifier_ops cg_sock_prog_ops = {
.get_func_proto = bpf_base_func_proto, .get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access, .is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access, .convert_ctx_access = sock_filter_convert_ctx_access,
}; };
......
...@@ -9,8 +9,13 @@ SEC("cgroup/sock1") ...@@ -9,8 +9,13 @@ SEC("cgroup/sock1")
int bpf_prog1(struct bpf_sock *sk) int bpf_prog1(struct bpf_sock *sk)
{ {
char fmt[] = "socket: family %d type %d protocol %d\n"; char fmt[] = "socket: family %d type %d protocol %d\n";
char fmt2[] = "socket: uid %u gid %u\n";
__u64 gid_uid = bpf_get_current_uid_gid();
__u32 uid = gid_uid & 0xffffffff;
__u32 gid = gid_uid >> 32;
bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid);
/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
* ie., make ping6 fail * ie., make ping6 fail
......
...@@ -19,68 +19,271 @@ ...@@ -19,68 +19,271 @@
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <net/if.h> #include <net/if.h>
#include <inttypes.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include "libbpf.h" #include "libbpf.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE]; char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int idx) static int prog_load(__u32 idx, __u32 mark, __u32 prio)
{ {
struct bpf_insn prog[] = { /* save pointer to context */
struct bpf_insn prog_start[] = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
};
struct bpf_insn prog_end[] = {
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
BPF_EXIT_INSN(),
};
/* set sk_bound_dev_if on socket */
struct bpf_insn prog_dev[] = {
BPF_MOV64_IMM(BPF_REG_3, idx), BPF_MOV64_IMM(BPF_REG_3, idx),
BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
BPF_EXIT_INSN(),
}; };
size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, /* set mark on socket */
struct bpf_insn prog_mark[] = {
/* get uid of process */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_current_uid_gid),
BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
/* if uid is 0, use given mark, else use the uid as the mark */
BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
BPF_MOV64_IMM(BPF_REG_3, mark),
/* set the mark on the new socket */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)),
};
/* set priority on socket */
struct bpf_insn prog_prio[] = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_IMM(BPF_REG_3, prio),
BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)),
};
struct bpf_insn *prog;
size_t insns_cnt;
void *p;
int ret;
insns_cnt = sizeof(prog_start) + sizeof(prog_end);
if (idx)
insns_cnt += sizeof(prog_dev);
if (mark)
insns_cnt += sizeof(prog_mark);
if (prio)
insns_cnt += sizeof(prog_prio);
p = prog = malloc(insns_cnt);
if (!prog) {
fprintf(stderr, "Failed to allocate memory for instructions\n");
return EXIT_FAILURE;
}
memcpy(p, prog_start, sizeof(prog_start));
p += sizeof(prog_start);
if (idx) {
memcpy(p, prog_dev, sizeof(prog_dev));
p += sizeof(prog_dev);
}
if (mark) {
memcpy(p, prog_mark, sizeof(prog_mark));
p += sizeof(prog_mark);
}
if (prio) {
memcpy(p, prog_prio, sizeof(prog_prio));
p += sizeof(prog_prio);
}
memcpy(p, prog_end, sizeof(prog_end));
p += sizeof(prog_end);
insns_cnt /= sizeof(struct bpf_insn);
ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
"GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
free(prog);
return ret;
}
static int get_bind_to_device(int sd, char *name, size_t len)
{
socklen_t optlen = len;
int rc;
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
perror("setsockopt(SO_BINDTODEVICE)");
return rc;
}
static unsigned int get_somark(int sd)
{
unsigned int mark = 0;
socklen_t optlen = sizeof(mark);
int rc;
rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen);
if (rc < 0)
perror("getsockopt(SO_MARK)");
return mark;
}
static unsigned int get_priority(int sd)
{
unsigned int prio = 0;
socklen_t optlen = sizeof(prio);
int rc;
rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen);
if (rc < 0)
perror("getsockopt(SO_PRIORITY)");
return prio;
}
static int show_sockopts(int family)
{
unsigned int mark, prio;
char name[16];
int sd;
sd = socket(family, SOCK_DGRAM, 17);
if (sd < 0) {
perror("socket");
return 1;
}
if (get_bind_to_device(sd, name, sizeof(name)) < 0)
return 1;
mark = get_somark(sd);
prio = get_priority(sd);
close(sd);
printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio);
return 0;
} }
static int usage(const char *argv0) static int usage(const char *argv0)
{ {
printf("Usage: %s cg-path device-index\n", argv0); printf("Usage:\n");
printf(" Attach a program\n");
printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
printf("\n");
printf(" Detach a program\n");
printf(" %s -d cg-path\n", argv0);
printf("\n");
printf(" Show inherited socket settings (mark, priority, and device)\n");
printf(" %s [-6]\n", argv0);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
__u32 idx = 0, mark = 0, prio = 0;
const char *cgrp_path = NULL;
int cg_fd, prog_fd, ret; int cg_fd, prog_fd, ret;
unsigned int idx; int family = PF_INET;
int do_attach = 1;
if (argc < 2) int rc;
return usage(argv[0]);
idx = if_nametoindex(argv[2]); while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) {
switch (rc) {
case 'd':
do_attach = 0;
break;
case 'b':
idx = if_nametoindex(optarg);
if (!idx) {
idx = strtoumax(optarg, NULL, 0);
if (!idx) { if (!idx) {
printf("Invalid device name\n"); printf("Invalid device name\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
}
break;
case 'm':
mark = strtoumax(optarg, NULL, 0);
break;
case 'p':
prio = strtoumax(optarg, NULL, 0);
break;
case '6':
family = PF_INET6;
break;
default:
return usage(argv[0]);
}
}
if (optind == argc)
return show_sockopts(family);
cgrp_path = argv[optind];
if (!cgrp_path) {
fprintf(stderr, "cgroup path not given\n");
return EXIT_FAILURE;
}
cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); if (do_attach && !idx && !mark && !prio) {
fprintf(stderr,
"One of device, mark or priority must be given\n");
return EXIT_FAILURE;
}
cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY);
if (cg_fd < 0) { if (cg_fd < 0) {
printf("Failed to open cgroup path: '%s'\n", strerror(errno)); printf("Failed to open cgroup path: '%s'\n", strerror(errno));
return EXIT_FAILURE; return EXIT_FAILURE;
} }
prog_fd = prog_load(idx); if (do_attach) {
printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); prog_fd = prog_load(idx, mark, prio);
if (prog_fd < 0) { if (prog_fd < 0) {
printf("Failed to load prog: '%s'\n", strerror(errno)); printf("Failed to load prog: '%s'\n", strerror(errno));
printf("Output from kernel verifier:\n%s\n-------\n",
bpf_log_buf);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); ret = bpf_prog_attach(prog_fd, cg_fd,
BPF_CGROUP_INET_SOCK_CREATE, 0);
if (ret < 0) { if (ret < 0) {
printf("Failed to attach prog to cgroup: '%s'\n", printf("Failed to attach prog to cgroup: '%s'\n",
strerror(errno)); strerror(errno));
return EXIT_FAILURE; return EXIT_FAILURE;
} }
} else {
ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
if (ret < 0) {
printf("Failed to detach prog from cgroup: '%s'\n",
strerror(errno));
return EXIT_FAILURE;
}
}
close(cg_fd);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
#!/bin/bash #!/bin/sh
function config_device { # Test various socket options that can be set by attaching programs to cgroups.
ip netns add at_ns0
ip link add veth0 type veth peer name veth0b CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock"
ip link set veth0b up
ip link set veth0 netns at_ns0 ################################################################################
ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 #
ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad print_result()
ip netns exec at_ns0 ip link set dev veth0 up {
ip link add foo type vrf table 1234 local rc=$1
ip link set foo up local status=" OK "
ip addr add 172.16.1.101/24 dev veth0b
ip addr add 2401:db00::2/64 dev veth0b nodad [ $rc -ne 0 ] && status="FAIL"
ip link set veth0b master foo
printf "%-50s [%4s]\n" "$2" "$status"
} }
function attach_bpf { check_sock()
rm -rf /tmp/cgroupv2 {
mkdir -p /tmp/cgroupv2 out=$(test_cgrp2_sock)
mount -t cgroup2 none /tmp/cgroupv2 echo $out | grep -q "$1"
mkdir -p /tmp/cgroupv2/foo if [ $? -ne 0 ]; then
test_cgrp2_sock /tmp/cgroupv2/foo foo print_result 1 "IPv4: $2"
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs echo " expected: $1"
echo " have: $out"
rc=1
else
print_result 0 "IPv4: $2"
fi
} }
function cleanup { check_sock6()
set +ex {
ip netns delete at_ns0 out=$(test_cgrp2_sock -6)
ip link del veth0 echo $out | grep -q "$1"
ip link del foo if [ $? -ne 0 ]; then
umount /tmp/cgroupv2 print_result 1 "IPv6: $2"
rm -rf /tmp/cgroupv2 echo " expected: $1"
set -ex echo " have: $out"
rc=1
else
print_result 0 "IPv6: $2"
fi
} }
function do_test { ################################################################################
ping -c1 -w1 172.16.1.100 #
ping6 -c1 -w1 2401:db00::1
cleanup()
{
echo $$ >> ${CGRP_MNT}/cgroup.procs
rmdir ${CGRP_MNT}/sockopts
} }
cleanup_and_exit()
{
local rc=$1
local msg="$2"
[ -n "$msg" ] && echo "ERROR: $msg"
ip li del cgrp2_sock
umount ${CGRP_MNT}
exit $rc
}
################################################################################
# main
rc=0
ip li add cgrp2_sock type dummy 2>/dev/null
set -e
mkdir -p ${CGRP_MNT}
mount -t cgroup2 none ${CGRP_MNT}
set +e
# make sure we have a known start point
cleanup 2>/dev/null cleanup 2>/dev/null
config_device
attach_bpf mkdir -p ${CGRP_MNT}/sockopts
do_test [ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy"
cleanup
echo "*** PASS ***"
# set pid into cgroup
echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs
# no bpf program attached, so socket should show no settings
check_sock "dev , mark 0, priority 0" "No programs attached"
check_sock6 "dev , mark 0, priority 0" "No programs attached"
# verify device is set
#
test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts
if [ $? -ne 0 ]; then
cleanup_and_exit 1 "Failed to install program to set device"
fi
check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set"
check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set"
# verify mark is set
#
test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts
if [ $? -ne 0 ]; then
cleanup_and_exit 1 "Failed to install program to set mark"
fi
check_sock "dev , mark 666, priority 0" "Mark set"
check_sock6 "dev , mark 666, priority 0" "Mark set"
# verify priority is set
#
test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts
if [ $? -ne 0 ]; then
cleanup_and_exit 1 "Failed to install program to set priority"
fi
check_sock "dev , mark 0, priority 123" "Priority set"
check_sock6 "dev , mark 0, priority 123" "Priority set"
# all 3 at once
#
test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts
if [ $? -ne 0 ]; then
cleanup_and_exit 1 "Failed to install program to set device, mark and priority"
fi
check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set"
check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set"
cleanup_and_exit $rc
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment